| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.01, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1e-05, |
| "grad_norm": 1.7941410541534424, |
| "learning_rate": 0.010000000000000002, |
| "loss": 2.8372, |
| "loss/crossentropy": 2.6197855472564697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21744851768016815, |
| "loss/reg": 0.19270801544189453, |
| "step": 1 |
| }, |
| { |
| "epoch": 2e-05, |
| "grad_norm": 1.8685554265975952, |
| "learning_rate": 0.020000000000000004, |
| "loss": 3.0179, |
| "loss/crossentropy": 2.7890021800994873, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22886505722999573, |
| "loss/reg": 0.19270801544189453, |
| "step": 2 |
| }, |
| { |
| "epoch": 3e-05, |
| "grad_norm": 1.6844093799591064, |
| "learning_rate": 0.03, |
| "loss": 2.8727, |
| "loss/crossentropy": 2.6652822494506836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2074647843837738, |
| "loss/reg": 0.19247663021087646, |
| "step": 3 |
| }, |
| { |
| "epoch": 4e-05, |
| "grad_norm": 1.7308602333068848, |
| "learning_rate": 0.04000000000000001, |
| "loss": 3.0104, |
| "loss/crossentropy": 2.792520523071289, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21790620684623718, |
| "loss/reg": 0.19200405478477478, |
| "step": 4 |
| }, |
| { |
| "epoch": 5e-05, |
| "grad_norm": 1.8790887594223022, |
| "learning_rate": 0.05, |
| "loss": 2.8305, |
| "loss/crossentropy": 2.5941758155822754, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23635630309581757, |
| "loss/reg": 0.1913727968931198, |
| "step": 5 |
| }, |
| { |
| "epoch": 6e-05, |
| "grad_norm": 1.5929267406463623, |
| "learning_rate": 0.06, |
| "loss": 2.9442, |
| "loss/crossentropy": 2.7414002418518066, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20281994342803955, |
| "loss/reg": 0.1907350867986679, |
| "step": 6 |
| }, |
| { |
| "epoch": 7e-05, |
| "grad_norm": 1.71002995967865, |
| "learning_rate": 0.06999999999999999, |
| "loss": 2.8114, |
| "loss/crossentropy": 2.5980405807495117, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21340824663639069, |
| "loss/reg": 0.1901981383562088, |
| "step": 7 |
| }, |
| { |
| "epoch": 8e-05, |
| "grad_norm": 1.7461140155792236, |
| "learning_rate": 0.08000000000000002, |
| "loss": 2.3979, |
| "loss/crossentropy": 2.175525665283203, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22232553362846375, |
| "loss/reg": 0.18929249048233032, |
| "step": 8 |
| }, |
| { |
| "epoch": 9e-05, |
| "grad_norm": 1.804502248764038, |
| "learning_rate": 0.09000000000000001, |
| "loss": 2.8196, |
| "loss/crossentropy": 2.568608283996582, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25096768140792847, |
| "loss/reg": 0.1886214315891266, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0001, |
| "grad_norm": 1.781426191329956, |
| "learning_rate": 0.1, |
| "loss": 2.5661, |
| "loss/crossentropy": 2.3451571464538574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22095344960689545, |
| "loss/reg": 0.18797847628593445, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00011, |
| "grad_norm": 1.7866382598876953, |
| "learning_rate": 0.1, |
| "loss": 2.7191, |
| "loss/crossentropy": 2.5106096267700195, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20849493145942688, |
| "loss/reg": 0.18714258074760437, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00012, |
| "grad_norm": 3.4358882904052734, |
| "learning_rate": 0.1, |
| "loss": 2.5222, |
| "loss/crossentropy": 2.2724475860595703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24974870681762695, |
| "loss/reg": 0.18626242876052856, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00013, |
| "grad_norm": 2.0597925186157227, |
| "learning_rate": 0.1, |
| "loss": 3.0295, |
| "loss/crossentropy": 2.8004751205444336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22903449833393097, |
| "loss/reg": 0.18563689291477203, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00014, |
| "grad_norm": 1.967074990272522, |
| "learning_rate": 0.1, |
| "loss": 2.7617, |
| "loss/crossentropy": 2.52266001701355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2390647530555725, |
| "loss/reg": 0.18485110998153687, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00015, |
| "grad_norm": 2.981250762939453, |
| "learning_rate": 0.1, |
| "loss": 2.5548, |
| "loss/crossentropy": 2.2688779830932617, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2858789563179016, |
| "loss/reg": 0.18405626714229584, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.00016, |
| "grad_norm": 1.99958336353302, |
| "grad_norm_var": 0.24795925819279455, |
| "learning_rate": 0.1, |
| "loss": 2.7223, |
| "loss/crossentropy": 2.499023914337158, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2233087718486786, |
| "loss/reg": 0.18341079354286194, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00017, |
| "grad_norm": 2.0663859844207764, |
| "grad_norm_var": 0.24552223875209614, |
| "learning_rate": 0.1, |
| "loss": 2.649, |
| "loss/crossentropy": 2.4166884422302246, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23236128687858582, |
| "loss/reg": 0.18267498910427094, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00018, |
| "grad_norm": 1.8407412767410278, |
| "grad_norm_var": 0.2460799695739396, |
| "learning_rate": 0.1, |
| "loss": 2.8255, |
| "loss/crossentropy": 2.6198925971984863, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20556053519248962, |
| "loss/reg": 0.18212132155895233, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.00019, |
| "grad_norm": 1.864543080329895, |
| "grad_norm_var": 0.2404280343313161, |
| "learning_rate": 0.1, |
| "loss": 2.8002, |
| "loss/crossentropy": 2.5774385929107666, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22274565696716309, |
| "loss/reg": 0.1815088838338852, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0002, |
| "grad_norm": 1.8185856342315674, |
| "grad_norm_var": 0.2375805098991556, |
| "learning_rate": 0.1, |
| "loss": 2.9103, |
| "loss/crossentropy": 2.6820335388183594, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22825166583061218, |
| "loss/reg": 0.1807718276977539, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00021, |
| "grad_norm": 2.4235215187072754, |
| "grad_norm_var": 0.2458109438661452, |
| "learning_rate": 0.1, |
| "loss": 2.7855, |
| "loss/crossentropy": 2.5037312507629395, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2817690372467041, |
| "loss/reg": 0.18035979568958282, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00022, |
| "grad_norm": 1.9809528589248657, |
| "grad_norm_var": 0.2313182294669159, |
| "learning_rate": 0.1, |
| "loss": 2.9751, |
| "loss/crossentropy": 2.7493302822113037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22574451565742493, |
| "loss/reg": 0.17979250848293304, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00023, |
| "grad_norm": 2.034888744354248, |
| "grad_norm_var": 0.22192409655540496, |
| "learning_rate": 0.1, |
| "loss": 2.7843, |
| "loss/crossentropy": 2.519683599472046, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26464781165122986, |
| "loss/reg": 0.17909730970859528, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.00024, |
| "grad_norm": 1.733581781387329, |
| "grad_norm_var": 0.22252439655172077, |
| "learning_rate": 0.1, |
| "loss": 2.8826, |
| "loss/crossentropy": 2.6788527965545654, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2037709355354309, |
| "loss/reg": 0.17869915068149567, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.00025, |
| "grad_norm": 1.849076747894287, |
| "grad_norm_var": 0.2209000227649738, |
| "learning_rate": 0.1, |
| "loss": 2.5397, |
| "loss/crossentropy": 2.3268229961395264, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21282871067523956, |
| "loss/reg": 0.17821018397808075, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00026, |
| "grad_norm": 1.691557765007019, |
| "grad_norm_var": 0.22524001331960122, |
| "learning_rate": 0.1, |
| "loss": 2.737, |
| "loss/crossentropy": 2.528177499771118, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2087726891040802, |
| "loss/reg": 0.17761024832725525, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.00027, |
| "grad_norm": 4.388544082641602, |
| "grad_norm_var": 0.5410776042381009, |
| "learning_rate": 0.1, |
| "loss": 3.5201, |
| "loss/crossentropy": 3.2137458324432373, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.3063773512840271, |
| "loss/reg": 0.17715643346309662, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.00028, |
| "grad_norm": 2.4487318992614746, |
| "grad_norm_var": 0.44701336120503826, |
| "learning_rate": 0.1, |
| "loss": 2.8093, |
| "loss/crossentropy": 2.560337543487549, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24897679686546326, |
| "loss/reg": 0.17665596306324005, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.00029, |
| "grad_norm": 1.9202386140823364, |
| "grad_norm_var": 0.45077990596471784, |
| "learning_rate": 0.1, |
| "loss": 2.8007, |
| "loss/crossentropy": 2.5765867233276367, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22409090399742126, |
| "loss/reg": 0.17621678113937378, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0003, |
| "grad_norm": 1.9063693284988403, |
| "grad_norm_var": 0.45279905302935924, |
| "learning_rate": 0.1, |
| "loss": 2.5179, |
| "loss/crossentropy": 2.2874815464019775, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23042911291122437, |
| "loss/reg": 0.17579925060272217, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00031, |
| "grad_norm": 1.70511794090271, |
| "grad_norm_var": 0.4189766137661109, |
| "learning_rate": 0.1, |
| "loss": 2.8428, |
| "loss/crossentropy": 2.643998146057129, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1987657994031906, |
| "loss/reg": 0.17528142035007477, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.00032, |
| "grad_norm": 1.749182939529419, |
| "grad_norm_var": 0.426399087435419, |
| "learning_rate": 0.1, |
| "loss": 2.6153, |
| "loss/crossentropy": 2.4096920490264893, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20559975504875183, |
| "loss/reg": 0.17501850426197052, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.00033, |
| "grad_norm": 1.7957587242126465, |
| "grad_norm_var": 0.43178806302983236, |
| "learning_rate": 0.1, |
| "loss": 2.6359, |
| "loss/crossentropy": 2.407822370529175, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22803781926631927, |
| "loss/reg": 0.17458827793598175, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00034, |
| "grad_norm": 2.0065958499908447, |
| "grad_norm_var": 0.4283940935342485, |
| "learning_rate": 0.1, |
| "loss": 2.6696, |
| "loss/crossentropy": 2.414606809616089, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2550049424171448, |
| "loss/reg": 0.17423003911972046, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.00035, |
| "grad_norm": 1.7974509000778198, |
| "grad_norm_var": 0.43062365031250915, |
| "learning_rate": 0.1, |
| "loss": 2.864, |
| "loss/crossentropy": 2.660989999771118, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20300069451332092, |
| "loss/reg": 0.17377422749996185, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.00036, |
| "grad_norm": 1.7680789232254028, |
| "grad_norm_var": 0.432530945979033, |
| "learning_rate": 0.1, |
| "loss": 2.6188, |
| "loss/crossentropy": 2.403914451599121, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21493443846702576, |
| "loss/reg": 0.17340220510959625, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.00037, |
| "grad_norm": 2.317908763885498, |
| "grad_norm_var": 0.4283199894059083, |
| "learning_rate": 0.1, |
| "loss": 2.8529, |
| "loss/crossentropy": 2.6271157264709473, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22582311928272247, |
| "loss/reg": 0.173123300075531, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.00038, |
| "grad_norm": 1.9942872524261475, |
| "grad_norm_var": 0.42817566880153485, |
| "learning_rate": 0.1, |
| "loss": 2.5906, |
| "loss/crossentropy": 2.340468168258667, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25018030405044556, |
| "loss/reg": 0.1727283000946045, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.00039, |
| "grad_norm": 1.9727613925933838, |
| "grad_norm_var": 0.42870121726485066, |
| "learning_rate": 0.1, |
| "loss": 3.2977, |
| "loss/crossentropy": 3.083205461502075, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2144966721534729, |
| "loss/reg": 0.172413170337677, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0004, |
| "grad_norm": 1.6364719867706299, |
| "grad_norm_var": 0.4335860486092982, |
| "learning_rate": 0.1, |
| "loss": 2.7189, |
| "loss/crossentropy": 2.523791551589966, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19511213898658752, |
| "loss/reg": 0.1719983071088791, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00041, |
| "grad_norm": 1.8482328653335571, |
| "grad_norm_var": 0.43360974225961757, |
| "learning_rate": 0.1, |
| "loss": 2.6773, |
| "loss/crossentropy": 2.462472438812256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21487540006637573, |
| "loss/reg": 0.17160135507583618, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.00042, |
| "grad_norm": 1.7280985116958618, |
| "grad_norm_var": 0.4319019771537389, |
| "learning_rate": 0.1, |
| "loss": 2.8645, |
| "loss/crossentropy": 2.655454397201538, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20901742577552795, |
| "loss/reg": 0.17148931324481964, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.00043, |
| "grad_norm": 1.8898181915283203, |
| "grad_norm_var": 0.046839339332182324, |
| "learning_rate": 0.1, |
| "loss": 2.7555, |
| "loss/crossentropy": 2.536778688430786, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21869313716888428, |
| "loss/reg": 0.17108288407325745, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.00044, |
| "grad_norm": 1.647220253944397, |
| "grad_norm_var": 0.028917078312208785, |
| "learning_rate": 0.1, |
| "loss": 2.6615, |
| "loss/crossentropy": 2.478940725326538, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18257135152816772, |
| "loss/reg": 0.17084510624408722, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.00045, |
| "grad_norm": 1.9125159978866577, |
| "grad_norm_var": 0.028853861892116524, |
| "learning_rate": 0.1, |
| "loss": 2.598, |
| "loss/crossentropy": 2.37441349029541, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22361578047275543, |
| "loss/reg": 0.17055301368236542, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.00046, |
| "grad_norm": 3.1315228939056396, |
| "grad_norm_var": 0.1310999835447101, |
| "learning_rate": 0.1, |
| "loss": 2.9111, |
| "loss/crossentropy": 2.656038999557495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25508174300193787, |
| "loss/reg": 0.17023393511772156, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.00047, |
| "grad_norm": 1.7906099557876587, |
| "grad_norm_var": 0.12897839502201733, |
| "learning_rate": 0.1, |
| "loss": 2.6518, |
| "loss/crossentropy": 2.4433135986328125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20846828818321228, |
| "loss/reg": 0.1698823720216751, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.00048, |
| "grad_norm": 1.6994481086730957, |
| "grad_norm_var": 0.130376192278175, |
| "learning_rate": 0.1, |
| "loss": 2.8731, |
| "loss/crossentropy": 2.664911985397339, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2082216739654541, |
| "loss/reg": 0.16974325478076935, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.00049, |
| "grad_norm": 1.6140257120132446, |
| "grad_norm_var": 0.1357791731520154, |
| "learning_rate": 0.1, |
| "loss": 2.8102, |
| "loss/crossentropy": 2.6029086112976074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.207241490483284, |
| "loss/reg": 0.16932587325572968, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0005, |
| "grad_norm": 1.7099595069885254, |
| "grad_norm_var": 0.13794038281624807, |
| "learning_rate": 0.1, |
| "loss": 2.7699, |
| "loss/crossentropy": 2.554683208465576, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2151976078748703, |
| "loss/reg": 0.1692301481962204, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00051, |
| "grad_norm": 1.8096407651901245, |
| "grad_norm_var": 0.13777706170046333, |
| "learning_rate": 0.1, |
| "loss": 2.7015, |
| "loss/crossentropy": 2.4817771911621094, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2197084128856659, |
| "loss/reg": 0.1689380258321762, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.00052, |
| "grad_norm": 1.7712668180465698, |
| "grad_norm_var": 0.13771974789169666, |
| "learning_rate": 0.1, |
| "loss": 2.8154, |
| "loss/crossentropy": 2.594926595687866, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22043970227241516, |
| "loss/reg": 0.1687590628862381, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.00053, |
| "grad_norm": 1.7525863647460938, |
| "grad_norm_var": 0.12654128300209752, |
| "learning_rate": 0.1, |
| "loss": 2.7242, |
| "loss/crossentropy": 2.522385835647583, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20183579623699188, |
| "loss/reg": 0.16853466629981995, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.00054, |
| "grad_norm": 1.8120129108428955, |
| "grad_norm_var": 0.12557967663842623, |
| "learning_rate": 0.1, |
| "loss": 2.5661, |
| "loss/crossentropy": 2.3559939861297607, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2101481854915619, |
| "loss/reg": 0.16827252507209778, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.00055, |
| "grad_norm": 1.602097749710083, |
| "grad_norm_var": 0.12848934693259723, |
| "learning_rate": 0.1, |
| "loss": 2.7779, |
| "loss/crossentropy": 2.5753490924835205, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2025029957294464, |
| "loss/reg": 0.16798608005046844, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.00056, |
| "grad_norm": 1.7756836414337158, |
| "grad_norm_var": 0.1260207885693923, |
| "learning_rate": 0.1, |
| "loss": 2.8945, |
| "loss/crossentropy": 2.668313980102539, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22622469067573547, |
| "loss/reg": 0.16773369908332825, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.00057, |
| "grad_norm": 1.6873103380203247, |
| "grad_norm_var": 0.12753605299891219, |
| "learning_rate": 0.1, |
| "loss": 2.5983, |
| "loss/crossentropy": 2.411243438720703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1870155930519104, |
| "loss/reg": 0.16742756962776184, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.00058, |
| "grad_norm": 1.9386037588119507, |
| "grad_norm_var": 0.12735106768899035, |
| "learning_rate": 0.1, |
| "loss": 2.5882, |
| "loss/crossentropy": 2.359182119369507, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22898568212985992, |
| "loss/reg": 0.16724736988544464, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.00059, |
| "grad_norm": 3.909019708633423, |
| "grad_norm_var": 0.3938314772090505, |
| "learning_rate": 0.1, |
| "loss": 2.6867, |
| "loss/crossentropy": 2.3856070041656494, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.30108943581581116, |
| "loss/reg": 0.1671791821718216, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0006, |
| "grad_norm": 1.9209262132644653, |
| "grad_norm_var": 0.3866348221235699, |
| "learning_rate": 0.1, |
| "loss": 2.6558, |
| "loss/crossentropy": 2.447204113006592, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20862701535224915, |
| "loss/reg": 0.16685180366039276, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00061, |
| "grad_norm": 1.9172567129135132, |
| "grad_norm_var": 0.38658735890980034, |
| "learning_rate": 0.1, |
| "loss": 2.5699, |
| "loss/crossentropy": 2.35029673576355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21964755654335022, |
| "loss/reg": 0.16663654148578644, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.00062, |
| "grad_norm": 1.9377732276916504, |
| "grad_norm_var": 0.2939795162853282, |
| "learning_rate": 0.1, |
| "loss": 3.0421, |
| "loss/crossentropy": 2.8235275745391846, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21854163706302643, |
| "loss/reg": 0.16646318137645721, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.00063, |
| "grad_norm": 1.656541347503662, |
| "grad_norm_var": 0.2973356744839809, |
| "learning_rate": 0.1, |
| "loss": 2.6364, |
| "loss/crossentropy": 2.4385454654693604, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1978561282157898, |
| "loss/reg": 0.16618403792381287, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.00064, |
| "grad_norm": 2.101804733276367, |
| "grad_norm_var": 0.2963119838014231, |
| "learning_rate": 0.1, |
| "loss": 2.9811, |
| "loss/crossentropy": 2.765476703643799, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21564313769340515, |
| "loss/reg": 0.1660410314798355, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.00065, |
| "grad_norm": 2.5749895572662354, |
| "grad_norm_var": 0.313250018404158, |
| "learning_rate": 0.1, |
| "loss": 2.9426, |
| "loss/crossentropy": 2.691192865371704, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2513768672943115, |
| "loss/reg": 0.16587170958518982, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.00066, |
| "grad_norm": 2.017178773880005, |
| "grad_norm_var": 0.30758188247720164, |
| "learning_rate": 0.1, |
| "loss": 2.6205, |
| "loss/crossentropy": 2.410979986190796, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20955853164196014, |
| "loss/reg": 0.16572552919387817, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.00067, |
| "grad_norm": 1.881057858467102, |
| "grad_norm_var": 0.30597808537116955, |
| "learning_rate": 0.1, |
| "loss": 2.8162, |
| "loss/crossentropy": 2.5815954208374023, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2346353977918625, |
| "loss/reg": 0.1655614972114563, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.00068, |
| "grad_norm": 1.7658600807189941, |
| "grad_norm_var": 0.30615634510237055, |
| "learning_rate": 0.1, |
| "loss": 2.7744, |
| "loss/crossentropy": 2.5518367290496826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.222543865442276, |
| "loss/reg": 0.16535314917564392, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.00069, |
| "grad_norm": 1.8009297847747803, |
| "grad_norm_var": 0.30460663816056877, |
| "learning_rate": 0.1, |
| "loss": 2.8592, |
| "loss/crossentropy": 2.6507787704467773, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20840439200401306, |
| "loss/reg": 0.16517679393291473, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0007, |
| "grad_norm": 1.785271406173706, |
| "grad_norm_var": 0.3053882480199533, |
| "learning_rate": 0.1, |
| "loss": 2.6, |
| "loss/crossentropy": 2.3875892162323, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21238045394420624, |
| "loss/reg": 0.16507619619369507, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00071, |
| "grad_norm": 2.001296043395996, |
| "grad_norm_var": 0.29326341922192395, |
| "learning_rate": 0.1, |
| "loss": 2.7152, |
| "loss/crossentropy": 2.464181900024414, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2510414719581604, |
| "loss/reg": 0.16482555866241455, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.00072, |
| "grad_norm": 1.8108326196670532, |
| "grad_norm_var": 0.2920926806909326, |
| "learning_rate": 0.1, |
| "loss": 2.6657, |
| "loss/crossentropy": 2.4663877487182617, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19929474592208862, |
| "loss/reg": 0.16470520198345184, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.00073, |
| "grad_norm": 1.8082332611083984, |
| "grad_norm_var": 0.2872529776700346, |
| "learning_rate": 0.1, |
| "loss": 2.5833, |
| "loss/crossentropy": 2.361788749694824, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22149252891540527, |
| "loss/reg": 0.16450858116149902, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.00074, |
| "grad_norm": 1.898726224899292, |
| "grad_norm_var": 0.28795382414913867, |
| "learning_rate": 0.1, |
| "loss": 2.9553, |
| "loss/crossentropy": 2.7490241527557373, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20631715655326843, |
| "loss/reg": 0.16432958841323853, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.00075, |
| "grad_norm": 1.845772624015808, |
| "grad_norm_var": 0.0423884833109937, |
| "learning_rate": 0.1, |
| "loss": 2.9804, |
| "loss/crossentropy": 2.753929853439331, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22646912932395935, |
| "loss/reg": 0.16411833465099335, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.00076, |
| "grad_norm": 1.7554599046707153, |
| "grad_norm_var": 0.04408537942632377, |
| "learning_rate": 0.1, |
| "loss": 2.8329, |
| "loss/crossentropy": 2.6151506900787354, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2177816778421402, |
| "loss/reg": 0.16405650973320007, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.00077, |
| "grad_norm": 1.7131043672561646, |
| "grad_norm_var": 0.046491007479150366, |
| "learning_rate": 0.1, |
| "loss": 2.7486, |
| "loss/crossentropy": 2.5505027770996094, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19811946153640747, |
| "loss/reg": 0.16392755508422852, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.00078, |
| "grad_norm": 1.9777545928955078, |
| "grad_norm_var": 0.04680732673595423, |
| "learning_rate": 0.1, |
| "loss": 3.0103, |
| "loss/crossentropy": 2.776825428009033, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23347489535808563, |
| "loss/reg": 0.16359421610832214, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.00079, |
| "grad_norm": 2.339536666870117, |
| "grad_norm_var": 0.053821195061358115, |
| "learning_rate": 0.1, |
| "loss": 2.5372, |
| "loss/crossentropy": 2.2812371253967285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25598034262657166, |
| "loss/reg": 0.16351112723350525, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0008, |
| "grad_norm": 1.734583854675293, |
| "grad_norm_var": 0.054442683827191196, |
| "learning_rate": 0.1, |
| "loss": 2.4136, |
| "loss/crossentropy": 2.2008090019226074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21282735466957092, |
| "loss/reg": 0.16335174441337585, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00081, |
| "grad_norm": 1.7617944478988647, |
| "grad_norm_var": 0.02469138523525179, |
| "learning_rate": 0.1, |
| "loss": 2.6852, |
| "loss/crossentropy": 2.4770615100860596, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20818310976028442, |
| "loss/reg": 0.16316014528274536, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.00082, |
| "grad_norm": 1.9816601276397705, |
| "grad_norm_var": 0.024066529955945035, |
| "learning_rate": 0.1, |
| "loss": 2.7955, |
| "loss/crossentropy": 2.6095259189605713, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18594592809677124, |
| "loss/reg": 0.16319124400615692, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.00083, |
| "grad_norm": 1.8290983438491821, |
| "grad_norm_var": 0.024133490394293015, |
| "learning_rate": 0.1, |
| "loss": 2.6438, |
| "loss/crossentropy": 2.417361259460449, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22647587954998016, |
| "loss/reg": 0.1630745381116867, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.00084, |
| "grad_norm": 1.8714231252670288, |
| "grad_norm_var": 0.023461027211922135, |
| "learning_rate": 0.1, |
| "loss": 2.6158, |
| "loss/crossentropy": 2.3823697566986084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2334725707769394, |
| "loss/reg": 0.16289766132831573, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.00085, |
| "grad_norm": 1.6547683477401733, |
| "grad_norm_var": 0.026136770068175254, |
| "learning_rate": 0.1, |
| "loss": 2.6022, |
| "loss/crossentropy": 2.408360242843628, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19381171464920044, |
| "loss/reg": 0.16268208622932434, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.00086, |
| "grad_norm": 1.8692580461502075, |
| "grad_norm_var": 0.02573428253156127, |
| "learning_rate": 0.1, |
| "loss": 2.6977, |
| "loss/crossentropy": 2.4621360301971436, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2355971336364746, |
| "loss/reg": 0.1626405119895935, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.00087, |
| "grad_norm": 1.7254503965377808, |
| "grad_norm_var": 0.025507648334505008, |
| "learning_rate": 0.1, |
| "loss": 2.5622, |
| "loss/crossentropy": 2.3580234050750732, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20416195690631866, |
| "loss/reg": 0.16234871745109558, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.00088, |
| "grad_norm": 1.7793629169464111, |
| "grad_norm_var": 0.02572797763801109, |
| "learning_rate": 0.1, |
| "loss": 2.9687, |
| "loss/crossentropy": 2.767064332962036, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20166558027267456, |
| "loss/reg": 0.16240248084068298, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.00089, |
| "grad_norm": 1.721548080444336, |
| "grad_norm_var": 0.02664134610365251, |
| "learning_rate": 0.1, |
| "loss": 2.7699, |
| "loss/crossentropy": 2.5585005283355713, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21136051416397095, |
| "loss/reg": 0.16210931539535522, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0009, |
| "grad_norm": 1.8082941770553589, |
| "grad_norm_var": 0.0264589166504661, |
| "learning_rate": 0.1, |
| "loss": 2.8145, |
| "loss/crossentropy": 2.607401132583618, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20708514750003815, |
| "loss/reg": 0.16195623576641083, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.00091, |
| "grad_norm": 1.915407419204712, |
| "grad_norm_var": 0.02685685218971082, |
| "learning_rate": 0.1, |
| "loss": 2.7644, |
| "loss/crossentropy": 2.5465660095214844, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21782971918582916, |
| "loss/reg": 0.1619657576084137, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.00092, |
| "grad_norm": 1.943249225616455, |
| "grad_norm_var": 0.026946480224736055, |
| "learning_rate": 0.1, |
| "loss": 2.8884, |
| "loss/crossentropy": 2.6643056869506836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22411349415779114, |
| "loss/reg": 0.16182415187358856, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.00093, |
| "grad_norm": 1.778732180595398, |
| "grad_norm_var": 0.02600339998267387, |
| "learning_rate": 0.1, |
| "loss": 2.9448, |
| "loss/crossentropy": 2.7302517890930176, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2145233452320099, |
| "loss/reg": 0.16179771721363068, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.00094, |
| "grad_norm": 2.3324451446533203, |
| "grad_norm_var": 0.03963631758308731, |
| "learning_rate": 0.1, |
| "loss": 2.5716, |
| "loss/crossentropy": 2.347743511199951, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22382411360740662, |
| "loss/reg": 0.16151709854602814, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.00095, |
| "grad_norm": 1.8147391080856323, |
| "grad_norm_var": 0.024548420664558, |
| "learning_rate": 0.1, |
| "loss": 2.6233, |
| "loss/crossentropy": 2.421243190765381, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20203803479671478, |
| "loss/reg": 0.1616181880235672, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.00096, |
| "grad_norm": 2.151325225830078, |
| "grad_norm_var": 0.029261373196793214, |
| "learning_rate": 0.1, |
| "loss": 2.8669, |
| "loss/crossentropy": 2.6277081966400146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2391999363899231, |
| "loss/reg": 0.16138805449008942, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.00097, |
| "grad_norm": 1.7476903200149536, |
| "grad_norm_var": 0.029479473096021933, |
| "learning_rate": 0.1, |
| "loss": 2.7742, |
| "loss/crossentropy": 2.572164297103882, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20205846428871155, |
| "loss/reg": 0.16124150156974792, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00098, |
| "grad_norm": 1.7204608917236328, |
| "grad_norm_var": 0.029864490374853798, |
| "learning_rate": 0.1, |
| "loss": 2.6682, |
| "loss/crossentropy": 2.4673049449920654, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2008523941040039, |
| "loss/reg": 0.16115766763687134, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.00099, |
| "grad_norm": 1.7873692512512207, |
| "grad_norm_var": 0.03011161271887962, |
| "learning_rate": 0.1, |
| "loss": 2.7796, |
| "loss/crossentropy": 2.5443999767303467, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23517972230911255, |
| "loss/reg": 0.16098013520240784, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 1.6834875345230103, |
| "grad_norm_var": 0.03181598615698225, |
| "learning_rate": 0.1, |
| "loss": 2.757, |
| "loss/crossentropy": 2.5468289852142334, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21012938022613525, |
| "loss/reg": 0.16100959479808807, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00101, |
| "grad_norm": 1.9399726390838623, |
| "grad_norm_var": 0.029871219645123345, |
| "learning_rate": 0.1, |
| "loss": 2.5498, |
| "loss/crossentropy": 2.320744514465332, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22902999818325043, |
| "loss/reg": 0.1607155203819275, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.00102, |
| "grad_norm": 1.6978514194488525, |
| "grad_norm_var": 0.03143703849124388, |
| "learning_rate": 0.1, |
| "loss": 2.391, |
| "loss/crossentropy": 2.193343162536621, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1976414918899536, |
| "loss/reg": 0.16065587103366852, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.00103, |
| "grad_norm": 1.6977367401123047, |
| "grad_norm_var": 0.03193312033378574, |
| "learning_rate": 0.1, |
| "loss": 2.6103, |
| "loss/crossentropy": 2.418210983276367, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19208090007305145, |
| "loss/reg": 0.16060210764408112, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.00104, |
| "grad_norm": 1.8626060485839844, |
| "grad_norm_var": 0.031637924847175175, |
| "learning_rate": 0.1, |
| "loss": 2.7385, |
| "loss/crossentropy": 2.522723913192749, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21581590175628662, |
| "loss/reg": 0.16052451729774475, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.00105, |
| "grad_norm": 1.7438913583755493, |
| "grad_norm_var": 0.03128591180344884, |
| "learning_rate": 0.1, |
| "loss": 2.839, |
| "loss/crossentropy": 2.6107664108276367, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2282416969537735, |
| "loss/reg": 0.16041938960552216, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.00106, |
| "grad_norm": 2.114018678665161, |
| "grad_norm_var": 0.03536321148732349, |
| "learning_rate": 0.1, |
| "loss": 2.4629, |
| "loss/crossentropy": 2.232969045639038, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22997362911701202, |
| "loss/reg": 0.16031785309314728, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.00107, |
| "grad_norm": 1.8013360500335693, |
| "grad_norm_var": 0.03549629451111868, |
| "learning_rate": 0.1, |
| "loss": 2.8945, |
| "loss/crossentropy": 2.6938414573669434, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20061947405338287, |
| "loss/reg": 0.1603301614522934, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.00108, |
| "grad_norm": 1.7160793542861938, |
| "grad_norm_var": 0.03630785554785968, |
| "learning_rate": 0.1, |
| "loss": 2.6298, |
| "loss/crossentropy": 2.4277350902557373, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20209258794784546, |
| "loss/reg": 0.16006989777088165, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.00109, |
| "grad_norm": 1.6853837966918945, |
| "grad_norm_var": 0.03773152725828523, |
| "learning_rate": 0.1, |
| "loss": 2.8953, |
| "loss/crossentropy": 2.688405990600586, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20690396428108215, |
| "loss/reg": 0.16008752584457397, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0011, |
| "grad_norm": 1.7110825777053833, |
| "grad_norm_var": 0.02135598541223184, |
| "learning_rate": 0.1, |
| "loss": 2.8252, |
| "loss/crossentropy": 2.608750820159912, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21645289659500122, |
| "loss/reg": 0.15988831222057343, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00111, |
| "grad_norm": 1.837937831878662, |
| "grad_norm_var": 0.021420706983363343, |
| "learning_rate": 0.1, |
| "loss": 2.5936, |
| "loss/crossentropy": 2.3865153789520264, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2070549726486206, |
| "loss/reg": 0.15985874831676483, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.00112, |
| "grad_norm": 1.7919666767120361, |
| "grad_norm_var": 0.012952468361958758, |
| "learning_rate": 0.1, |
| "loss": 2.7372, |
| "loss/crossentropy": 2.5049924850463867, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23224014043807983, |
| "loss/reg": 0.15970398485660553, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.00113, |
| "grad_norm": 1.8749711513519287, |
| "grad_norm_var": 0.013354230503546205, |
| "learning_rate": 0.1, |
| "loss": 2.8534, |
| "loss/crossentropy": 2.6308281421661377, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22260984778404236, |
| "loss/reg": 0.1596403270959854, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.00114, |
| "grad_norm": 1.730081558227539, |
| "grad_norm_var": 0.013268716990129785, |
| "learning_rate": 0.1, |
| "loss": 2.8143, |
| "loss/crossentropy": 2.5993852615356445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21487459540367126, |
| "loss/reg": 0.15961258113384247, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.00115, |
| "grad_norm": 1.6594314575195312, |
| "grad_norm_var": 0.014374737191151137, |
| "learning_rate": 0.1, |
| "loss": 2.8622, |
| "loss/crossentropy": 2.663595199584961, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19859746098518372, |
| "loss/reg": 0.1594453752040863, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.00116, |
| "grad_norm": 1.8340096473693848, |
| "grad_norm_var": 0.013768737078364702, |
| "learning_rate": 0.1, |
| "loss": 2.9001, |
| "loss/crossentropy": 2.6800649166107178, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2200334221124649, |
| "loss/reg": 0.15948700904846191, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.00117, |
| "grad_norm": 9.800415992736816, |
| "grad_norm_var": 4.028786937350557, |
| "learning_rate": 0.1, |
| "loss": 3.4834, |
| "loss/crossentropy": 3.0538530349731445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.42959335446357727, |
| "loss/reg": 0.1592700481414795, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.00118, |
| "grad_norm": 1.6741610765457153, |
| "grad_norm_var": 4.030676411030317, |
| "learning_rate": 0.1, |
| "loss": 2.8793, |
| "loss/crossentropy": 2.6746108531951904, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20465821027755737, |
| "loss/reg": 0.15919889509677887, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.00119, |
| "grad_norm": 2.479275703430176, |
| "grad_norm_var": 4.007817829629853, |
| "learning_rate": 0.1, |
| "loss": 2.746, |
| "loss/crossentropy": 2.5126523971557617, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23332132399082184, |
| "loss/reg": 0.15911614894866943, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0012, |
| "grad_norm": 1.7633051872253418, |
| "grad_norm_var": 4.014652797819997, |
| "learning_rate": 0.1, |
| "loss": 2.8031, |
| "loss/crossentropy": 2.593297004699707, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2098241001367569, |
| "loss/reg": 0.15906544029712677, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00121, |
| "grad_norm": 1.7128862142562866, |
| "grad_norm_var": 4.017119676881966, |
| "learning_rate": 0.1, |
| "loss": 2.8798, |
| "loss/crossentropy": 2.6694579124450684, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21036633849143982, |
| "loss/reg": 0.15902374684810638, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.00122, |
| "grad_norm": 1.6873313188552856, |
| "grad_norm_var": 4.0404530726448415, |
| "learning_rate": 0.1, |
| "loss": 2.7747, |
| "loss/crossentropy": 2.570366621017456, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20433162152767181, |
| "loss/reg": 0.1589815467596054, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.00123, |
| "grad_norm": 1.7031251192092896, |
| "grad_norm_var": 4.047552790574258, |
| "learning_rate": 0.1, |
| "loss": 2.6301, |
| "loss/crossentropy": 2.4278907775878906, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2022581398487091, |
| "loss/reg": 0.15884126722812653, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.00124, |
| "grad_norm": 1.9169403314590454, |
| "grad_norm_var": 4.0346680314699315, |
| "learning_rate": 0.1, |
| "loss": 2.3787, |
| "loss/crossentropy": 2.1689770221710205, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2097214311361313, |
| "loss/reg": 0.15869207680225372, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.00125, |
| "grad_norm": 3.1283042430877686, |
| "grad_norm_var": 4.045799422662404, |
| "learning_rate": 0.1, |
| "loss": 3.0155, |
| "loss/crossentropy": 2.746854543685913, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26862430572509766, |
| "loss/reg": 0.15870903432369232, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.00126, |
| "grad_norm": 1.9831962585449219, |
| "grad_norm_var": 4.025647018526637, |
| "learning_rate": 0.1, |
| "loss": 2.4818, |
| "loss/crossentropy": 2.2390859127044678, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24275356531143188, |
| "loss/reg": 0.1585802137851715, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.00127, |
| "grad_norm": 2.1434175968170166, |
| "grad_norm_var": 4.008134789661286, |
| "learning_rate": 0.1, |
| "loss": 2.7133, |
| "loss/crossentropy": 2.454806089401245, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.258467435836792, |
| "loss/reg": 0.15847015380859375, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 1.8749781847000122, |
| "grad_norm_var": 4.001501640008951, |
| "learning_rate": 0.1, |
| "loss": 2.533, |
| "loss/crossentropy": 2.3184025287628174, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2146439403295517, |
| "loss/reg": 0.15848059952259064, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.00129, |
| "grad_norm": 1.8413902521133423, |
| "grad_norm_var": 4.004081254550272, |
| "learning_rate": 0.1, |
| "loss": 2.864, |
| "loss/crossentropy": 2.64398193359375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22002694010734558, |
| "loss/reg": 0.15835124254226685, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0013, |
| "grad_norm": 1.6885493993759155, |
| "grad_norm_var": 4.008083029094881, |
| "learning_rate": 0.1, |
| "loss": 2.4168, |
| "loss/crossentropy": 2.2391769886016846, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17763087153434753, |
| "loss/reg": 0.15839768946170807, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00131, |
| "grad_norm": 2.158484935760498, |
| "grad_norm_var": 3.9723303655756923, |
| "learning_rate": 0.1, |
| "loss": 2.8241, |
| "loss/crossentropy": 2.5652995109558105, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25875556468963623, |
| "loss/reg": 0.15823814272880554, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.00132, |
| "grad_norm": 1.7983602285385132, |
| "grad_norm_var": 3.9753941324049613, |
| "learning_rate": 0.1, |
| "loss": 2.8251, |
| "loss/crossentropy": 2.6149044036865234, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21023395657539368, |
| "loss/reg": 0.15798808634281158, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.00133, |
| "grad_norm": 1.9443055391311646, |
| "grad_norm_var": 0.14346460767656594, |
| "learning_rate": 0.1, |
| "loss": 2.9435, |
| "loss/crossentropy": 2.7223899364471436, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22111660242080688, |
| "loss/reg": 0.15817613899707794, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.00134, |
| "grad_norm": 1.899189829826355, |
| "grad_norm_var": 0.13779441057120395, |
| "learning_rate": 0.1, |
| "loss": 2.7795, |
| "loss/crossentropy": 2.5600945949554443, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21940697729587555, |
| "loss/reg": 0.15794193744659424, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.00135, |
| "grad_norm": 1.630270004272461, |
| "grad_norm_var": 0.12663120134069245, |
| "learning_rate": 0.1, |
| "loss": 2.7035, |
| "loss/crossentropy": 2.5131657123565674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1903446763753891, |
| "loss/reg": 0.1579643189907074, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.00136, |
| "grad_norm": 2.495762348175049, |
| "grad_norm_var": 0.1439188814578981, |
| "learning_rate": 0.1, |
| "loss": 2.7466, |
| "loss/crossentropy": 2.509981870651245, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2366005778312683, |
| "loss/reg": 0.15786632895469666, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.00137, |
| "grad_norm": 1.8029515743255615, |
| "grad_norm_var": 0.14127334497109126, |
| "learning_rate": 0.1, |
| "loss": 2.6665, |
| "loss/crossentropy": 2.450725555419922, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2158118486404419, |
| "loss/reg": 0.15775997936725616, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.00138, |
| "grad_norm": 1.7371032238006592, |
| "grad_norm_var": 0.13947908157823363, |
| "learning_rate": 0.1, |
| "loss": 2.6852, |
| "loss/crossentropy": 2.4715776443481445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21362335979938507, |
| "loss/reg": 0.1578553318977356, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.00139, |
| "grad_norm": 1.8639756441116333, |
| "grad_norm_var": 0.13506916575409847, |
| "learning_rate": 0.1, |
| "loss": 2.9065, |
| "loss/crossentropy": 2.694209098815918, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21229249238967896, |
| "loss/reg": 0.1577604115009308, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0014, |
| "grad_norm": 1.816977858543396, |
| "grad_norm_var": 0.1367234220915094, |
| "learning_rate": 0.1, |
| "loss": 2.6881, |
| "loss/crossentropy": 2.4862794876098633, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2017817199230194, |
| "loss/reg": 0.15767979621887207, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00141, |
| "grad_norm": 1.739311695098877, |
| "grad_norm_var": 0.04611241615331186, |
| "learning_rate": 0.1, |
| "loss": 2.63, |
| "loss/crossentropy": 2.4266629219055176, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20337159931659698, |
| "loss/reg": 0.15773198008537292, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.00142, |
| "grad_norm": 1.773036241531372, |
| "grad_norm_var": 0.04657351522375753, |
| "learning_rate": 0.1, |
| "loss": 2.794, |
| "loss/crossentropy": 2.592991352081299, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20103268325328827, |
| "loss/reg": 0.15748059749603271, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.00143, |
| "grad_norm": 1.8834407329559326, |
| "grad_norm_var": 0.041944214419125575, |
| "learning_rate": 0.1, |
| "loss": 2.6798, |
| "loss/crossentropy": 2.446930170059204, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23286782205104828, |
| "loss/reg": 0.15748316049575806, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.00144, |
| "grad_norm": 1.8624577522277832, |
| "grad_norm_var": 0.04194863204302563, |
| "learning_rate": 0.1, |
| "loss": 2.9222, |
| "loss/crossentropy": 2.709848642349243, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21236401796340942, |
| "loss/reg": 0.157439187169075, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.00145, |
| "grad_norm": 2.198138475418091, |
| "grad_norm_var": 0.048495819470673496, |
| "learning_rate": 0.1, |
| "loss": 3.0113, |
| "loss/crossentropy": 2.766000270843506, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24530062079429626, |
| "loss/reg": 0.1571916937828064, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.00146, |
| "grad_norm": 1.8742032051086426, |
| "grad_norm_var": 0.04558241378375474, |
| "learning_rate": 0.1, |
| "loss": 2.8061, |
| "loss/crossentropy": 2.6045267581939697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20158621668815613, |
| "loss/reg": 0.15721361339092255, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.00147, |
| "grad_norm": 1.9563077688217163, |
| "grad_norm_var": 0.041300535692287595, |
| "learning_rate": 0.1, |
| "loss": 2.827, |
| "loss/crossentropy": 2.5931785106658936, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23382678627967834, |
| "loss/reg": 0.15716728568077087, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.00148, |
| "grad_norm": 1.9061962366104126, |
| "grad_norm_var": 0.04067755053454955, |
| "learning_rate": 0.1, |
| "loss": 2.5933, |
| "loss/crossentropy": 2.3885960578918457, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20471733808517456, |
| "loss/reg": 0.15726977586746216, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.00149, |
| "grad_norm": 1.9156067371368408, |
| "grad_norm_var": 0.040555575967018774, |
| "learning_rate": 0.1, |
| "loss": 2.9642, |
| "loss/crossentropy": 2.7354202270507812, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2287421077489853, |
| "loss/reg": 0.15707099437713623, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 1.8910969495773315, |
| "grad_norm_var": 0.04055750400486927, |
| "learning_rate": 0.1, |
| "loss": 2.739, |
| "loss/crossentropy": 2.5182340145111084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22076836228370667, |
| "loss/reg": 0.15697073936462402, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.00151, |
| "grad_norm": 1.6989645957946777, |
| "grad_norm_var": 0.03841233967358916, |
| "learning_rate": 0.1, |
| "loss": 2.8063, |
| "loss/crossentropy": 2.612990617752075, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19335077702999115, |
| "loss/reg": 0.156888946890831, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.00152, |
| "grad_norm": 2.2469356060028076, |
| "grad_norm_var": 0.022548668654939222, |
| "learning_rate": 0.1, |
| "loss": 2.7743, |
| "loss/crossentropy": 2.5491178035736084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22520646452903748, |
| "loss/reg": 0.15691237151622772, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.00153, |
| "grad_norm": 2.0647037029266357, |
| "grad_norm_var": 0.023952667497254214, |
| "learning_rate": 0.1, |
| "loss": 2.8908, |
| "loss/crossentropy": 2.6590237617492676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23176053166389465, |
| "loss/reg": 0.1568426787853241, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.00154, |
| "grad_norm": 1.982617735862732, |
| "grad_norm_var": 0.02232931325879098, |
| "learning_rate": 0.1, |
| "loss": 2.8867, |
| "loss/crossentropy": 2.652101755142212, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23458367586135864, |
| "loss/reg": 0.15686176717281342, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.00155, |
| "grad_norm": 1.7477375268936157, |
| "grad_norm_var": 0.023997472158968237, |
| "learning_rate": 0.1, |
| "loss": 2.7266, |
| "loss/crossentropy": 2.513819694519043, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21280211210250854, |
| "loss/reg": 0.15666137635707855, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.00156, |
| "grad_norm": 1.746482491493225, |
| "grad_norm_var": 0.025181090744457416, |
| "learning_rate": 0.1, |
| "loss": 2.595, |
| "loss/crossentropy": 2.3864552974700928, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2085363268852234, |
| "loss/reg": 0.1566024124622345, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.00157, |
| "grad_norm": 1.7721116542816162, |
| "grad_norm_var": 0.024521743057247393, |
| "learning_rate": 0.1, |
| "loss": 2.801, |
| "loss/crossentropy": 2.59161639213562, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2094038426876068, |
| "loss/reg": 0.1567317098379135, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.00158, |
| "grad_norm": 1.7272623777389526, |
| "grad_norm_var": 0.025473367009851165, |
| "learning_rate": 0.1, |
| "loss": 2.8403, |
| "loss/crossentropy": 2.6272571086883545, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2130330204963684, |
| "loss/reg": 0.15663808584213257, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.00159, |
| "grad_norm": 1.7689458131790161, |
| "grad_norm_var": 0.02661633517824171, |
| "learning_rate": 0.1, |
| "loss": 2.7816, |
| "loss/crossentropy": 2.5743751525878906, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20725733041763306, |
| "loss/reg": 0.1564728170633316, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0016, |
| "grad_norm": 1.8344650268554688, |
| "grad_norm_var": 0.02679604615027126, |
| "learning_rate": 0.1, |
| "loss": 2.6906, |
| "loss/crossentropy": 2.4688560962677, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2217807173728943, |
| "loss/reg": 0.15650171041488647, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.00161, |
| "grad_norm": 2.190237283706665, |
| "grad_norm_var": 0.02648136928819606, |
| "learning_rate": 0.1, |
| "loss": 2.785, |
| "loss/crossentropy": 2.548779010772705, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23622919619083405, |
| "loss/reg": 0.156360924243927, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.00162, |
| "grad_norm": 1.8707976341247559, |
| "grad_norm_var": 0.026491647449358644, |
| "learning_rate": 0.1, |
| "loss": 2.7825, |
| "loss/crossentropy": 2.5638697147369385, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21858900785446167, |
| "loss/reg": 0.15635156631469727, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.00163, |
| "grad_norm": 1.9453237056732178, |
| "grad_norm_var": 0.02640944320711795, |
| "learning_rate": 0.1, |
| "loss": 2.6527, |
| "loss/crossentropy": 2.4083635807037354, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24432258307933807, |
| "loss/reg": 0.15627038478851318, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.00164, |
| "grad_norm": 1.8414756059646606, |
| "grad_norm_var": 0.026568952606560416, |
| "learning_rate": 0.1, |
| "loss": 2.77, |
| "loss/crossentropy": 2.567917823791504, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20211508870124817, |
| "loss/reg": 0.156290665268898, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.00165, |
| "grad_norm": 1.8225215673446655, |
| "grad_norm_var": 0.026796387157016235, |
| "learning_rate": 0.1, |
| "loss": 2.8676, |
| "loss/crossentropy": 2.6358389854431152, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2317136526107788, |
| "loss/reg": 0.15617597103118896, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.00166, |
| "grad_norm": 1.8607885837554932, |
| "grad_norm_var": 0.026827059432316898, |
| "learning_rate": 0.1, |
| "loss": 3.0376, |
| "loss/crossentropy": 2.8110339641571045, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22655826807022095, |
| "loss/reg": 0.15617407858371735, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.00167, |
| "grad_norm": 2.158717393875122, |
| "grad_norm_var": 0.028781808053518827, |
| "learning_rate": 0.1, |
| "loss": 2.586, |
| "loss/crossentropy": 2.366919994354248, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21912360191345215, |
| "loss/reg": 0.1561058908700943, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.00168, |
| "grad_norm": 1.6196560859680176, |
| "grad_norm_var": 0.025304329377889623, |
| "learning_rate": 0.1, |
| "loss": 2.52, |
| "loss/crossentropy": 2.3246798515319824, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19532260298728943, |
| "loss/reg": 0.1559765487909317, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.00169, |
| "grad_norm": 1.8778904676437378, |
| "grad_norm_var": 0.022688452465346388, |
| "learning_rate": 0.1, |
| "loss": 2.6975, |
| "loss/crossentropy": 2.484034299850464, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2134588062763214, |
| "loss/reg": 0.15602269768714905, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0017, |
| "grad_norm": 1.7405486106872559, |
| "grad_norm_var": 0.02240738120180333, |
| "learning_rate": 0.1, |
| "loss": 2.711, |
| "loss/crossentropy": 2.5013461112976074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20963236689567566, |
| "loss/reg": 0.1559809446334839, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.00171, |
| "grad_norm": 1.9269903898239136, |
| "grad_norm_var": 0.022083583420706945, |
| "learning_rate": 0.1, |
| "loss": 2.7673, |
| "loss/crossentropy": 2.5454654693603516, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22185717523097992, |
| "loss/reg": 0.1558973789215088, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.00172, |
| "grad_norm": 1.779687762260437, |
| "grad_norm_var": 0.021665347733998353, |
| "learning_rate": 0.1, |
| "loss": 2.8069, |
| "loss/crossentropy": 2.574251890182495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2326408177614212, |
| "loss/reg": 0.15579788386821747, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.00173, |
| "grad_norm": 1.764530062675476, |
| "grad_norm_var": 0.021756358134594696, |
| "learning_rate": 0.1, |
| "loss": 2.7154, |
| "loss/crossentropy": 2.5051937103271484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21020236611366272, |
| "loss/reg": 0.15591895580291748, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.00174, |
| "grad_norm": 2.020695686340332, |
| "grad_norm_var": 0.022018270561008475, |
| "learning_rate": 0.1, |
| "loss": 2.7555, |
| "loss/crossentropy": 2.5307304859161377, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22480268776416779, |
| "loss/reg": 0.15572616457939148, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.00175, |
| "grad_norm": 1.6638091802597046, |
| "grad_norm_var": 0.024216207433312733, |
| "learning_rate": 0.1, |
| "loss": 2.9335, |
| "loss/crossentropy": 2.732165575027466, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20137354731559753, |
| "loss/reg": 0.15584979951381683, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.00176, |
| "grad_norm": 1.8311518430709839, |
| "grad_norm_var": 0.024232539869292916, |
| "learning_rate": 0.1, |
| "loss": 2.544, |
| "loss/crossentropy": 2.3460371494293213, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1979653686285019, |
| "loss/reg": 0.15576419234275818, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.00177, |
| "grad_norm": 2.1571810245513916, |
| "grad_norm_var": 0.0228879620020012, |
| "learning_rate": 0.1, |
| "loss": 2.6406, |
| "loss/crossentropy": 2.4037833213806152, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23679789900779724, |
| "loss/reg": 0.15563981235027313, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.00178, |
| "grad_norm": 1.7307029962539673, |
| "grad_norm_var": 0.02405508254050274, |
| "learning_rate": 0.1, |
| "loss": 2.7375, |
| "loss/crossentropy": 2.519805669784546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21766524016857147, |
| "loss/reg": 0.1556066870689392, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.00179, |
| "grad_norm": 1.9073370695114136, |
| "grad_norm_var": 0.023707312178814277, |
| "learning_rate": 0.1, |
| "loss": 2.782, |
| "loss/crossentropy": 2.548213481903076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2337910532951355, |
| "loss/reg": 0.15549595654010773, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0018, |
| "grad_norm": 1.668044090270996, |
| "grad_norm_var": 0.025934188741468193, |
| "learning_rate": 0.1, |
| "loss": 2.5973, |
| "loss/crossentropy": 2.3964524269104004, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2007993459701538, |
| "loss/reg": 0.1554734855890274, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.00181, |
| "grad_norm": 1.7559475898742676, |
| "grad_norm_var": 0.02641641322401919, |
| "learning_rate": 0.1, |
| "loss": 2.711, |
| "loss/crossentropy": 2.488459825515747, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22252880036830902, |
| "loss/reg": 0.15542587637901306, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.00182, |
| "grad_norm": 1.9459789991378784, |
| "grad_norm_var": 0.02708932281542307, |
| "learning_rate": 0.1, |
| "loss": 2.8267, |
| "loss/crossentropy": 2.6021034717559814, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22461587190628052, |
| "loss/reg": 0.1552930772304535, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.00183, |
| "grad_norm": 1.929276704788208, |
| "grad_norm_var": 0.02083743901722522, |
| "learning_rate": 0.1, |
| "loss": 2.7984, |
| "loss/crossentropy": 2.554861545562744, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24350666999816895, |
| "loss/reg": 0.155385360121727, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.00184, |
| "grad_norm": 1.7980250120162964, |
| "grad_norm_var": 0.017764790135069576, |
| "learning_rate": 0.1, |
| "loss": 2.6926, |
| "loss/crossentropy": 2.483315944671631, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20924511551856995, |
| "loss/reg": 0.1552865207195282, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.00185, |
| "grad_norm": 1.7116559743881226, |
| "grad_norm_var": 0.018732148417778188, |
| "learning_rate": 0.1, |
| "loss": 2.7532, |
| "loss/crossentropy": 2.5443434715270996, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20890448987483978, |
| "loss/reg": 0.15541072189807892, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.00186, |
| "grad_norm": 1.7394353151321411, |
| "grad_norm_var": 0.018745982366965976, |
| "learning_rate": 0.1, |
| "loss": 2.854, |
| "loss/crossentropy": 2.6455330848693848, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20842963457107544, |
| "loss/reg": 0.15537504851818085, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.00187, |
| "grad_norm": 1.8237719535827637, |
| "grad_norm_var": 0.018120428526477165, |
| "learning_rate": 0.1, |
| "loss": 2.6766, |
| "loss/crossentropy": 2.465033769607544, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21156403422355652, |
| "loss/reg": 0.15524177253246307, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.00188, |
| "grad_norm": 1.8015027046203613, |
| "grad_norm_var": 0.018013423507288864, |
| "learning_rate": 0.1, |
| "loss": 2.8027, |
| "loss/crossentropy": 2.5790135860443115, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22372035682201385, |
| "loss/reg": 0.15516634285449982, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.00189, |
| "grad_norm": 1.7554924488067627, |
| "grad_norm_var": 0.018095089442842275, |
| "learning_rate": 0.1, |
| "loss": 2.798, |
| "loss/crossentropy": 2.5742616653442383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22375284135341644, |
| "loss/reg": 0.155086487531662, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.0019, |
| "grad_norm": 1.7152998447418213, |
| "grad_norm_var": 0.01605745383508198, |
| "learning_rate": 0.1, |
| "loss": 3.0122, |
| "loss/crossentropy": 2.7884738445281982, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2237313687801361, |
| "loss/reg": 0.15506187081336975, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.00191, |
| "grad_norm": 1.7042826414108276, |
| "grad_norm_var": 0.015379484604689726, |
| "learning_rate": 0.1, |
| "loss": 2.8972, |
| "loss/crossentropy": 2.697880268096924, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19933123886585236, |
| "loss/reg": 0.15499132871627808, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 2.1700685024261475, |
| "grad_norm_var": 0.023471736172786882, |
| "learning_rate": 0.1, |
| "loss": 2.7465, |
| "loss/crossentropy": 2.4999959468841553, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2465011477470398, |
| "loss/reg": 0.1550794541835785, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.00193, |
| "grad_norm": 1.9875377416610718, |
| "grad_norm_var": 0.017917941796167725, |
| "learning_rate": 0.1, |
| "loss": 2.8084, |
| "loss/crossentropy": 2.592473268508911, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21596524119377136, |
| "loss/reg": 0.15495066344738007, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.00194, |
| "grad_norm": 1.7647018432617188, |
| "grad_norm_var": 0.017578485890829103, |
| "learning_rate": 0.1, |
| "loss": 2.6997, |
| "loss/crossentropy": 2.484449863433838, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2152816355228424, |
| "loss/reg": 0.15510164201259613, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.00195, |
| "grad_norm": 1.7545061111450195, |
| "grad_norm_var": 0.0173329343146681, |
| "learning_rate": 0.1, |
| "loss": 2.5521, |
| "loss/crossentropy": 2.318634033203125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23342692852020264, |
| "loss/reg": 0.15488843619823456, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.00196, |
| "grad_norm": 1.692375898361206, |
| "grad_norm_var": 0.016896110742925192, |
| "learning_rate": 0.1, |
| "loss": 2.7914, |
| "loss/crossentropy": 2.5870914459228516, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20435144007205963, |
| "loss/reg": 0.1546718031167984, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.00197, |
| "grad_norm": 2.025480270385742, |
| "grad_norm_var": 0.019292250110413297, |
| "learning_rate": 0.1, |
| "loss": 2.6882, |
| "loss/crossentropy": 2.4381253719329834, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2501007616519928, |
| "loss/reg": 0.15473034977912903, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.00198, |
| "grad_norm": 1.7170000076293945, |
| "grad_norm_var": 0.01910347680724076, |
| "learning_rate": 0.1, |
| "loss": 2.8105, |
| "loss/crossentropy": 2.588508129119873, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22198833525180817, |
| "loss/reg": 0.15471874177455902, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.00199, |
| "grad_norm": 1.533130168914795, |
| "grad_norm_var": 0.023042113286929046, |
| "learning_rate": 0.1, |
| "loss": 2.5664, |
| "loss/crossentropy": 2.3867835998535156, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1796259582042694, |
| "loss/reg": 0.15475012362003326, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 2.1506989002227783, |
| "grad_norm_var": 0.031033668077444076, |
| "learning_rate": 0.1, |
| "loss": 3.042, |
| "loss/crossentropy": 2.7968554496765137, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24515381455421448, |
| "loss/reg": 0.15457867085933685, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00201, |
| "grad_norm": 1.767865777015686, |
| "grad_norm_var": 0.030453362189312116, |
| "learning_rate": 0.1, |
| "loss": 2.7452, |
| "loss/crossentropy": 2.5487236976623535, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1965104341506958, |
| "loss/reg": 0.15463702380657196, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.00202, |
| "grad_norm": 1.7154775857925415, |
| "grad_norm_var": 0.030743224372249026, |
| "learning_rate": 0.1, |
| "loss": 2.5319, |
| "loss/crossentropy": 2.336526870727539, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19532932341098785, |
| "loss/reg": 0.1546226292848587, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.00203, |
| "grad_norm": 1.6919760704040527, |
| "grad_norm_var": 0.03171775637413413, |
| "learning_rate": 0.1, |
| "loss": 2.7155, |
| "loss/crossentropy": 2.511000394821167, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20454420149326324, |
| "loss/reg": 0.15456627309322357, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.00204, |
| "grad_norm": 1.767663598060608, |
| "grad_norm_var": 0.03182410889101603, |
| "learning_rate": 0.1, |
| "loss": 2.7485, |
| "loss/crossentropy": 2.5229337215423584, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2255423665046692, |
| "loss/reg": 0.15459197759628296, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.00205, |
| "grad_norm": 1.6704747676849365, |
| "grad_norm_var": 0.03286083634256247, |
| "learning_rate": 0.1, |
| "loss": 2.5001, |
| "loss/crossentropy": 2.2876498699188232, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21244969964027405, |
| "loss/reg": 0.15454533696174622, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.00206, |
| "grad_norm": 1.6478105783462524, |
| "grad_norm_var": 0.0339237426337488, |
| "learning_rate": 0.1, |
| "loss": 2.7097, |
| "loss/crossentropy": 2.49638032913208, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21330426633358002, |
| "loss/reg": 0.15437744557857513, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.00207, |
| "grad_norm": 1.7262388467788696, |
| "grad_norm_var": 0.03368078685438339, |
| "learning_rate": 0.1, |
| "loss": 2.8699, |
| "loss/crossentropy": 2.6611640453338623, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20871958136558533, |
| "loss/reg": 0.15437661111354828, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.00208, |
| "grad_norm": 1.829162836074829, |
| "grad_norm_var": 0.024074926323939393, |
| "learning_rate": 0.1, |
| "loss": 3.0279, |
| "loss/crossentropy": 2.8036413192749023, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2242809385061264, |
| "loss/reg": 0.15435901284217834, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.00209, |
| "grad_norm": 1.5774569511413574, |
| "grad_norm_var": 0.023108171331224447, |
| "learning_rate": 0.1, |
| "loss": 2.8636, |
| "loss/crossentropy": 2.6733367443084717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19021844863891602, |
| "loss/reg": 0.15440241992473602, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.0021, |
| "grad_norm": 1.679646372795105, |
| "grad_norm_var": 0.023416289555610585, |
| "learning_rate": 0.1, |
| "loss": 2.7128, |
| "loss/crossentropy": 2.499990701675415, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2127702236175537, |
| "loss/reg": 0.15446113049983978, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.00211, |
| "grad_norm": 3.3492116928100586, |
| "grad_norm_var": 0.18402207742987098, |
| "learning_rate": 0.1, |
| "loss": 3.0624, |
| "loss/crossentropy": 2.7580738067626953, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.3043629825115204, |
| "loss/reg": 0.1542549580335617, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.00212, |
| "grad_norm": 1.9247390031814575, |
| "grad_norm_var": 0.18262609283135584, |
| "learning_rate": 0.1, |
| "loss": 2.7329, |
| "loss/crossentropy": 2.5118932723999023, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22099050879478455, |
| "loss/reg": 0.15429657697677612, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.00213, |
| "grad_norm": 1.717865228652954, |
| "grad_norm_var": 0.18178902594024743, |
| "learning_rate": 0.1, |
| "loss": 2.564, |
| "loss/crossentropy": 2.341069221496582, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22290551662445068, |
| "loss/reg": 0.1541624665260315, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.00214, |
| "grad_norm": 1.7612051963806152, |
| "grad_norm_var": 0.1811764601449136, |
| "learning_rate": 0.1, |
| "loss": 2.7925, |
| "loss/crossentropy": 2.5686392784118652, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22386540472507477, |
| "loss/reg": 0.15426336228847504, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.00215, |
| "grad_norm": 1.8590441942214966, |
| "grad_norm_var": 0.1742883061075207, |
| "learning_rate": 0.1, |
| "loss": 2.7033, |
| "loss/crossentropy": 2.4690449237823486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23428821563720703, |
| "loss/reg": 0.15413622558116913, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.00216, |
| "grad_norm": 1.7261474132537842, |
| "grad_norm_var": 0.16936878514304846, |
| "learning_rate": 0.1, |
| "loss": 2.6144, |
| "loss/crossentropy": 2.398364305496216, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21607044339179993, |
| "loss/reg": 0.1542275995016098, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.00217, |
| "grad_norm": 1.9054256677627563, |
| "grad_norm_var": 0.1692605318141562, |
| "learning_rate": 0.1, |
| "loss": 2.7125, |
| "loss/crossentropy": 2.493739366531372, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21875670552253723, |
| "loss/reg": 0.1541271209716797, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.00218, |
| "grad_norm": 1.7580618858337402, |
| "grad_norm_var": 0.16862796958186402, |
| "learning_rate": 0.1, |
| "loss": 2.6474, |
| "loss/crossentropy": 2.44039249420166, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20704764127731323, |
| "loss/reg": 0.1541358381509781, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.00219, |
| "grad_norm": 1.8633517026901245, |
| "grad_norm_var": 0.16686394887920716, |
| "learning_rate": 0.1, |
| "loss": 2.8634, |
| "loss/crossentropy": 2.617461681365967, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24593022465705872, |
| "loss/reg": 0.15394145250320435, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0022, |
| "grad_norm": 1.882486343383789, |
| "grad_norm_var": 0.16627096807541614, |
| "learning_rate": 0.1, |
| "loss": 2.7262, |
| "loss/crossentropy": 2.5018985271453857, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22431042790412903, |
| "loss/reg": 0.15400530397891998, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.00221, |
| "grad_norm": 1.8483003377914429, |
| "grad_norm_var": 0.16357833237078695, |
| "learning_rate": 0.1, |
| "loss": 2.7445, |
| "loss/crossentropy": 2.5411136150360107, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2034027874469757, |
| "loss/reg": 0.15388135612010956, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.00222, |
| "grad_norm": 1.988399624824524, |
| "grad_norm_var": 0.16035191204298654, |
| "learning_rate": 0.1, |
| "loss": 3.0524, |
| "loss/crossentropy": 2.834516763687134, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21789054572582245, |
| "loss/reg": 0.15386797487735748, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.00223, |
| "grad_norm": 1.7462061643600464, |
| "grad_norm_var": 0.15991476641650948, |
| "learning_rate": 0.1, |
| "loss": 2.7644, |
| "loss/crossentropy": 2.5663912296295166, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1980125904083252, |
| "loss/reg": 0.15379007160663605, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.00224, |
| "grad_norm": 1.7728267908096313, |
| "grad_norm_var": 0.16065306229747964, |
| "learning_rate": 0.1, |
| "loss": 2.4811, |
| "loss/crossentropy": 2.2716434001922607, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20949465036392212, |
| "loss/reg": 0.15394888818264008, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.00225, |
| "grad_norm": 1.742086410522461, |
| "grad_norm_var": 0.15532134188652497, |
| "learning_rate": 0.1, |
| "loss": 2.7013, |
| "loss/crossentropy": 2.5073935985565186, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19393475353717804, |
| "loss/reg": 0.15393270552158356, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.00226, |
| "grad_norm": 1.839017629623413, |
| "grad_norm_var": 0.15206037005143855, |
| "learning_rate": 0.1, |
| "loss": 2.7106, |
| "loss/crossentropy": 2.497980833053589, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21265888214111328, |
| "loss/reg": 0.1538631170988083, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.00227, |
| "grad_norm": 1.797853708267212, |
| "grad_norm_var": 0.006390092945588084, |
| "learning_rate": 0.1, |
| "loss": 2.7354, |
| "loss/crossentropy": 2.510516405105591, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22487084567546844, |
| "loss/reg": 0.1537153571844101, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.00228, |
| "grad_norm": 1.7112071514129639, |
| "grad_norm_var": 0.006280981975396986, |
| "learning_rate": 0.1, |
| "loss": 2.74, |
| "loss/crossentropy": 2.5273284912109375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21264943480491638, |
| "loss/reg": 0.15376277267932892, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.00229, |
| "grad_norm": 1.7603391408920288, |
| "grad_norm_var": 0.00588629758813776, |
| "learning_rate": 0.1, |
| "loss": 2.6298, |
| "loss/crossentropy": 2.4050440788269043, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22476083040237427, |
| "loss/reg": 0.15372157096862793, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0023, |
| "grad_norm": 1.7794082164764404, |
| "grad_norm_var": 0.005788281368091693, |
| "learning_rate": 0.1, |
| "loss": 2.5987, |
| "loss/crossentropy": 2.3837478160858154, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21494728326797485, |
| "loss/reg": 0.1536727398633957, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.00231, |
| "grad_norm": 2.5233850479125977, |
| "grad_norm_var": 0.03760523194885786, |
| "learning_rate": 0.1, |
| "loss": 2.5266, |
| "loss/crossentropy": 2.3053321838378906, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22129446268081665, |
| "loss/reg": 0.15368299186229706, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.00232, |
| "grad_norm": 2.416210174560547, |
| "grad_norm_var": 0.05571550407545083, |
| "learning_rate": 0.1, |
| "loss": 2.8942, |
| "loss/crossentropy": 2.649854898452759, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24434776604175568, |
| "loss/reg": 0.15352365374565125, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.00233, |
| "grad_norm": 1.9233362674713135, |
| "grad_norm_var": 0.05575827670472127, |
| "learning_rate": 0.1, |
| "loss": 2.2167, |
| "loss/crossentropy": 2.0039470195770264, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21273092925548553, |
| "loss/reg": 0.1536063551902771, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00234, |
| "grad_norm": 1.9016902446746826, |
| "grad_norm_var": 0.05438629824691906, |
| "learning_rate": 0.1, |
| "loss": 2.5666, |
| "loss/crossentropy": 2.333000898361206, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23359721899032593, |
| "loss/reg": 0.15358415246009827, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.00235, |
| "grad_norm": 1.9583090543746948, |
| "grad_norm_var": 0.05440980211039322, |
| "learning_rate": 0.1, |
| "loss": 2.9504, |
| "loss/crossentropy": 2.7317676544189453, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21865971386432648, |
| "loss/reg": 0.1535932719707489, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.00236, |
| "grad_norm": 1.7869987487792969, |
| "grad_norm_var": 0.05535468191591401, |
| "learning_rate": 0.1, |
| "loss": 2.8751, |
| "loss/crossentropy": 2.6557424068450928, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21932704746723175, |
| "loss/reg": 0.15342706441879272, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.00237, |
| "grad_norm": 1.7052128314971924, |
| "grad_norm_var": 0.05773461539600403, |
| "learning_rate": 0.1, |
| "loss": 2.7931, |
| "loss/crossentropy": 2.5545198917388916, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.238602876663208, |
| "loss/reg": 0.15358272194862366, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.00238, |
| "grad_norm": 1.6776797771453857, |
| "grad_norm_var": 0.05998342975204854, |
| "learning_rate": 0.1, |
| "loss": 2.5506, |
| "loss/crossentropy": 2.336742639541626, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2138223648071289, |
| "loss/reg": 0.15348772704601288, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.00239, |
| "grad_norm": 1.61436128616333, |
| "grad_norm_var": 0.06337986952530053, |
| "learning_rate": 0.1, |
| "loss": 2.6361, |
| "loss/crossentropy": 2.427302598953247, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20879578590393066, |
| "loss/reg": 0.15353453159332275, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.0024, |
| "grad_norm": 1.9753355979919434, |
| "grad_norm_var": 0.06333619888282532, |
| "learning_rate": 0.1, |
| "loss": 2.4992, |
| "loss/crossentropy": 2.2673532962799072, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23185107111930847, |
| "loss/reg": 0.15337026119232178, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.00241, |
| "grad_norm": 1.8152681589126587, |
| "grad_norm_var": 0.06230544273460759, |
| "learning_rate": 0.1, |
| "loss": 3.0875, |
| "loss/crossentropy": 2.862337112426758, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22516539692878723, |
| "loss/reg": 0.15337352454662323, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.00242, |
| "grad_norm": 1.68618905544281, |
| "grad_norm_var": 0.06473483793106803, |
| "learning_rate": 0.1, |
| "loss": 2.8092, |
| "loss/crossentropy": 2.592268228530884, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21695148944854736, |
| "loss/reg": 0.15340402722358704, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.00243, |
| "grad_norm": 1.7959580421447754, |
| "grad_norm_var": 0.06475507957945652, |
| "learning_rate": 0.1, |
| "loss": 2.8199, |
| "loss/crossentropy": 2.61146879196167, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20842549204826355, |
| "loss/reg": 0.15336458384990692, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.00244, |
| "grad_norm": 1.793521523475647, |
| "grad_norm_var": 0.06335970240736793, |
| "learning_rate": 0.1, |
| "loss": 2.9026, |
| "loss/crossentropy": 2.682964563369751, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2196551263332367, |
| "loss/reg": 0.1532832235097885, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.00245, |
| "grad_norm": 2.395270824432373, |
| "grad_norm_var": 0.07824996528890983, |
| "learning_rate": 0.1, |
| "loss": 3.3004, |
| "loss/crossentropy": 3.0669214725494385, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23348459601402283, |
| "loss/reg": 0.15327098965644836, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.00246, |
| "grad_norm": 1.7178128957748413, |
| "grad_norm_var": 0.0796561701855523, |
| "learning_rate": 0.1, |
| "loss": 2.5849, |
| "loss/crossentropy": 2.3764595985412598, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20847919583320618, |
| "loss/reg": 0.15327328443527222, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.00247, |
| "grad_norm": 1.6658800840377808, |
| "grad_norm_var": 0.0563868153951051, |
| "learning_rate": 0.1, |
| "loss": 2.7426, |
| "loss/crossentropy": 2.5322439670562744, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2103504091501236, |
| "loss/reg": 0.15321698784828186, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.00248, |
| "grad_norm": 1.9437313079833984, |
| "grad_norm_var": 0.03557122059050712, |
| "learning_rate": 0.1, |
| "loss": 2.5684, |
| "loss/crossentropy": 2.34796404838562, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22038845717906952, |
| "loss/reg": 0.15317879617214203, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.00249, |
| "grad_norm": 2.0784988403320312, |
| "grad_norm_var": 0.03890791914994622, |
| "learning_rate": 0.1, |
| "loss": 2.8919, |
| "loss/crossentropy": 2.6263644695281982, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26550018787384033, |
| "loss/reg": 0.15328219532966614, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 1.5875276327133179, |
| "grad_norm_var": 0.042680210869009194, |
| "learning_rate": 0.1, |
| "loss": 2.9377, |
| "loss/crossentropy": 2.738030433654785, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19970470666885376, |
| "loss/reg": 0.15308958292007446, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.00251, |
| "grad_norm": 1.7811739444732666, |
| "grad_norm_var": 0.041489160464979594, |
| "learning_rate": 0.1, |
| "loss": 2.4077, |
| "loss/crossentropy": 2.192779302597046, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21496158838272095, |
| "loss/reg": 0.153029665350914, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.00252, |
| "grad_norm": 2.110591411590576, |
| "grad_norm_var": 0.04687833846929645, |
| "learning_rate": 0.1, |
| "loss": 2.5675, |
| "loss/crossentropy": 2.344547748565674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22292152047157288, |
| "loss/reg": 0.153157576918602, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.00253, |
| "grad_norm": 1.6467821598052979, |
| "grad_norm_var": 0.04809507830047816, |
| "learning_rate": 0.1, |
| "loss": 2.5356, |
| "loss/crossentropy": 2.328913450241089, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2067192941904068, |
| "loss/reg": 0.1530914008617401, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.00254, |
| "grad_norm": 1.5803449153900146, |
| "grad_norm_var": 0.05066854518537222, |
| "learning_rate": 0.1, |
| "loss": 2.6529, |
| "loss/crossentropy": 2.4646201133728027, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18831059336662292, |
| "loss/reg": 0.15316183865070343, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.00255, |
| "grad_norm": 1.705944299697876, |
| "grad_norm_var": 0.04862960622452282, |
| "learning_rate": 0.1, |
| "loss": 2.4527, |
| "loss/crossentropy": 2.234102964401245, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21864545345306396, |
| "loss/reg": 0.15300080180168152, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 1.8911834955215454, |
| "grad_norm_var": 0.04744137986644142, |
| "learning_rate": 0.1, |
| "loss": 2.8044, |
| "loss/crossentropy": 2.5729379653930664, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23144884407520294, |
| "loss/reg": 0.1528940498828888, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.00257, |
| "grad_norm": 1.7506827116012573, |
| "grad_norm_var": 0.04778356374063713, |
| "learning_rate": 0.1, |
| "loss": 2.9451, |
| "loss/crossentropy": 2.730300188064575, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21483469009399414, |
| "loss/reg": 0.1530746966600418, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.00258, |
| "grad_norm": 1.6771105527877808, |
| "grad_norm_var": 0.04795152791952398, |
| "learning_rate": 0.1, |
| "loss": 2.53, |
| "loss/crossentropy": 2.324577808380127, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.205410897731781, |
| "loss/reg": 0.1529548466205597, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.00259, |
| "grad_norm": 1.7804155349731445, |
| "grad_norm_var": 0.048016709926949314, |
| "learning_rate": 0.1, |
| "loss": 2.9017, |
| "loss/crossentropy": 2.69140625, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21026596426963806, |
| "loss/reg": 0.15299636125564575, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0026, |
| "grad_norm": 1.8070272207260132, |
| "grad_norm_var": 0.0479819513235317, |
| "learning_rate": 0.1, |
| "loss": 2.814, |
| "loss/crossentropy": 2.591458320617676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22255855798721313, |
| "loss/reg": 0.152926504611969, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.00261, |
| "grad_norm": 4.898253917694092, |
| "grad_norm_var": 0.6315259395468549, |
| "learning_rate": 0.1, |
| "loss": 2.9361, |
| "loss/crossentropy": 2.5626220703125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.3734857141971588, |
| "loss/reg": 0.15296080708503723, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.00262, |
| "grad_norm": 1.7901341915130615, |
| "grad_norm_var": 0.6293589856783472, |
| "learning_rate": 0.1, |
| "loss": 2.5932, |
| "loss/crossentropy": 2.3793349266052246, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21390476822853088, |
| "loss/reg": 0.15291240811347961, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.00263, |
| "grad_norm": 1.9146630764007568, |
| "grad_norm_var": 0.6227759214331409, |
| "learning_rate": 0.1, |
| "loss": 2.6254, |
| "loss/crossentropy": 2.3945417404174805, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23082862794399261, |
| "loss/reg": 0.15279501676559448, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.00264, |
| "grad_norm": 1.8639161586761475, |
| "grad_norm_var": 0.623735683907778, |
| "learning_rate": 0.1, |
| "loss": 2.611, |
| "loss/crossentropy": 2.360976457595825, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2500403821468353, |
| "loss/reg": 0.15281575918197632, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.00265, |
| "grad_norm": 1.8003004789352417, |
| "grad_norm_var": 0.6253463511770649, |
| "learning_rate": 0.1, |
| "loss": 2.7427, |
| "loss/crossentropy": 2.5121912956237793, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2304896116256714, |
| "loss/reg": 0.15287426114082336, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.00266, |
| "grad_norm": 1.8270395994186401, |
| "grad_norm_var": 0.6165856624346037, |
| "learning_rate": 0.1, |
| "loss": 2.7491, |
| "loss/crossentropy": 2.524933338165283, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22414851188659668, |
| "loss/reg": 0.15279553830623627, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.00267, |
| "grad_norm": 1.7439817190170288, |
| "grad_norm_var": 0.6177032027246103, |
| "learning_rate": 0.1, |
| "loss": 2.7722, |
| "loss/crossentropy": 2.540666341781616, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23151114583015442, |
| "loss/reg": 0.15278998017311096, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.00268, |
| "grad_norm": 1.6291568279266357, |
| "grad_norm_var": 0.6242413581296313, |
| "learning_rate": 0.1, |
| "loss": 2.4248, |
| "loss/crossentropy": 2.2316129207611084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19319671392440796, |
| "loss/reg": 0.15296587347984314, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.00269, |
| "grad_norm": 1.7584705352783203, |
| "grad_norm_var": 0.6204060170226725, |
| "learning_rate": 0.1, |
| "loss": 2.8382, |
| "loss/crossentropy": 2.617718458175659, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2204788625240326, |
| "loss/reg": 0.1526516228914261, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.0027, |
| "grad_norm": 1.839720606803894, |
| "grad_norm_var": 0.6113542616693726, |
| "learning_rate": 0.1, |
| "loss": 2.7007, |
| "loss/crossentropy": 2.482386827468872, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2183404266834259, |
| "loss/reg": 0.1527164876461029, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.00271, |
| "grad_norm": 1.805973768234253, |
| "grad_norm_var": 0.6083261436587122, |
| "learning_rate": 0.1, |
| "loss": 2.5827, |
| "loss/crossentropy": 2.363694906234741, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21900711953639984, |
| "loss/reg": 0.15270750224590302, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.00272, |
| "grad_norm": 1.774283766746521, |
| "grad_norm_var": 0.6106600880804003, |
| "learning_rate": 0.1, |
| "loss": 2.4794, |
| "loss/crossentropy": 2.2628819942474365, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2164846807718277, |
| "loss/reg": 0.15260453522205353, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.00273, |
| "grad_norm": 1.871080756187439, |
| "grad_norm_var": 0.6079037534934929, |
| "learning_rate": 0.1, |
| "loss": 2.9741, |
| "loss/crossentropy": 2.7448911666870117, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22917905449867249, |
| "loss/reg": 0.15267729759216309, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.00274, |
| "grad_norm": 1.7166439294815063, |
| "grad_norm_var": 0.6063714201972024, |
| "learning_rate": 0.1, |
| "loss": 3.0612, |
| "loss/crossentropy": 2.8549892902374268, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2062022089958191, |
| "loss/reg": 0.15258991718292236, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.00275, |
| "grad_norm": 1.856535792350769, |
| "grad_norm_var": 0.6046184267462507, |
| "learning_rate": 0.1, |
| "loss": 2.7351, |
| "loss/crossentropy": 2.5279064178466797, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2072233110666275, |
| "loss/reg": 0.1526574045419693, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.00276, |
| "grad_norm": 1.8589593172073364, |
| "grad_norm_var": 0.6034952843323494, |
| "learning_rate": 0.1, |
| "loss": 2.5052, |
| "loss/crossentropy": 2.2762982845306396, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22893573343753815, |
| "loss/reg": 0.15257768332958221, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.00277, |
| "grad_norm": 1.8286291360855103, |
| "grad_norm_var": 0.004898950102079643, |
| "learning_rate": 0.1, |
| "loss": 2.7997, |
| "loss/crossentropy": 2.5641186237335205, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2356192171573639, |
| "loss/reg": 0.1525966227054596, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.00278, |
| "grad_norm": 1.8533680438995361, |
| "grad_norm_var": 0.005023790218524245, |
| "learning_rate": 0.1, |
| "loss": 3.0137, |
| "loss/crossentropy": 2.7833714485168457, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2302965670824051, |
| "loss/reg": 0.15249623358249664, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.00279, |
| "grad_norm": 1.8012237548828125, |
| "grad_norm_var": 0.004228683805999367, |
| "learning_rate": 0.1, |
| "loss": 2.7935, |
| "loss/crossentropy": 2.5757014751434326, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21781861782073975, |
| "loss/reg": 0.1525435596704483, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.0028, |
| "grad_norm": 2.372845411300659, |
| "grad_norm_var": 0.024629722519929943, |
| "learning_rate": 0.1, |
| "loss": 2.7282, |
| "loss/crossentropy": 2.461944818496704, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26621773838996887, |
| "loss/reg": 0.1525908261537552, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.00281, |
| "grad_norm": 2.0157835483551025, |
| "grad_norm_var": 0.026573949471247655, |
| "learning_rate": 0.1, |
| "loss": 2.743, |
| "loss/crossentropy": 2.5056850910186768, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2372947633266449, |
| "loss/reg": 0.15249086916446686, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.00282, |
| "grad_norm": 1.7902612686157227, |
| "grad_norm_var": 0.026756891142050186, |
| "learning_rate": 0.1, |
| "loss": 2.5835, |
| "loss/crossentropy": 2.3747575283050537, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20872806012630463, |
| "loss/reg": 0.15239422023296356, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.00283, |
| "grad_norm": 1.6276655197143555, |
| "grad_norm_var": 0.02916617007040679, |
| "learning_rate": 0.1, |
| "loss": 2.5265, |
| "loss/crossentropy": 2.3247153759002686, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20179952681064606, |
| "loss/reg": 0.15247441828250885, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.00284, |
| "grad_norm": 1.9575281143188477, |
| "grad_norm_var": 0.02678189875540789, |
| "learning_rate": 0.1, |
| "loss": 2.7146, |
| "loss/crossentropy": 2.5061917304992676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20839399099349976, |
| "loss/reg": 0.15234006941318512, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.00285, |
| "grad_norm": 1.7467279434204102, |
| "grad_norm_var": 0.02694644320787152, |
| "learning_rate": 0.1, |
| "loss": 2.605, |
| "loss/crossentropy": 2.390573024749756, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.214436873793602, |
| "loss/reg": 0.15244217216968536, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.00286, |
| "grad_norm": 1.7694292068481445, |
| "grad_norm_var": 0.02742025789345348, |
| "learning_rate": 0.1, |
| "loss": 2.758, |
| "loss/crossentropy": 2.5358898639678955, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2220873236656189, |
| "loss/reg": 0.15232709050178528, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.00287, |
| "grad_norm": 1.9870858192443848, |
| "grad_norm_var": 0.028336354940760346, |
| "learning_rate": 0.1, |
| "loss": 3.0446, |
| "loss/crossentropy": 2.799497365951538, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24507908523082733, |
| "loss/reg": 0.1524602174758911, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.00288, |
| "grad_norm": 2.011697292327881, |
| "grad_norm_var": 0.029011183856840657, |
| "learning_rate": 0.1, |
| "loss": 2.9156, |
| "loss/crossentropy": 2.7041590213775635, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21143893897533417, |
| "loss/reg": 0.15246528387069702, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.00289, |
| "grad_norm": 1.9326939582824707, |
| "grad_norm_var": 0.029182636074934484, |
| "learning_rate": 0.1, |
| "loss": 2.7526, |
| "loss/crossentropy": 2.514970302581787, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23767763376235962, |
| "loss/reg": 0.15244287252426147, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.0029, |
| "grad_norm": 1.8608566522598267, |
| "grad_norm_var": 0.02728482096249157, |
| "learning_rate": 0.1, |
| "loss": 2.672, |
| "loss/crossentropy": 2.468048095703125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20392370223999023, |
| "loss/reg": 0.15232525765895844, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.00291, |
| "grad_norm": 1.717362403869629, |
| "grad_norm_var": 0.029152665287153864, |
| "learning_rate": 0.1, |
| "loss": 2.7496, |
| "loss/crossentropy": 2.5444157123565674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20515665411949158, |
| "loss/reg": 0.15229053795337677, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.00292, |
| "grad_norm": 1.7725898027420044, |
| "grad_norm_var": 0.029898710523408964, |
| "learning_rate": 0.1, |
| "loss": 2.5431, |
| "loss/crossentropy": 2.3313050270080566, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21183520555496216, |
| "loss/reg": 0.15226081013679504, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.00293, |
| "grad_norm": 2.084184408187866, |
| "grad_norm_var": 0.03230302316822697, |
| "learning_rate": 0.1, |
| "loss": 2.3555, |
| "loss/crossentropy": 2.161330461502075, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1941923201084137, |
| "loss/reg": 0.1523096263408661, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.00294, |
| "grad_norm": 1.8714282512664795, |
| "grad_norm_var": 0.03222597186848641, |
| "learning_rate": 0.1, |
| "loss": 2.5887, |
| "loss/crossentropy": 2.37809157371521, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2106132209300995, |
| "loss/reg": 0.15226005017757416, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.00295, |
| "grad_norm": 1.9041036367416382, |
| "grad_norm_var": 0.031601676029156485, |
| "learning_rate": 0.1, |
| "loss": 2.7424, |
| "loss/crossentropy": 2.5080060958862305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.234354168176651, |
| "loss/reg": 0.152297243475914, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.00296, |
| "grad_norm": 1.829167366027832, |
| "grad_norm_var": 0.0158998100608646, |
| "learning_rate": 0.1, |
| "loss": 2.5338, |
| "loss/crossentropy": 2.308014154434204, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2257852554321289, |
| "loss/reg": 0.15232549607753754, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.00297, |
| "grad_norm": 1.8701921701431274, |
| "grad_norm_var": 0.014344364862462792, |
| "learning_rate": 0.1, |
| "loss": 2.7924, |
| "loss/crossentropy": 2.5564727783203125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23597592115402222, |
| "loss/reg": 0.15217500925064087, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.00298, |
| "grad_norm": 1.8343863487243652, |
| "grad_norm_var": 0.01406569460485585, |
| "learning_rate": 0.1, |
| "loss": 2.6232, |
| "loss/crossentropy": 2.4018571376800537, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.221307173371315, |
| "loss/reg": 0.15221168100833893, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.00299, |
| "grad_norm": 1.9616925716400146, |
| "grad_norm_var": 0.010644011423808782, |
| "learning_rate": 0.1, |
| "loss": 2.7246, |
| "loss/crossentropy": 2.492993116378784, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2316386103630066, |
| "loss/reg": 0.15220828354358673, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 1.6590018272399902, |
| "grad_norm_var": 0.013205424856254429, |
| "learning_rate": 0.1, |
| "loss": 2.7372, |
| "loss/crossentropy": 2.532999277114868, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20422445237636566, |
| "loss/reg": 0.15202808380126953, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.00301, |
| "grad_norm": 1.6716253757476807, |
| "grad_norm_var": 0.014725138970379466, |
| "learning_rate": 0.1, |
| "loss": 2.4122, |
| "loss/crossentropy": 2.2040345668792725, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20820075273513794, |
| "loss/reg": 0.15210776031017303, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.00302, |
| "grad_norm": 1.7748252153396606, |
| "grad_norm_var": 0.014662807890303439, |
| "learning_rate": 0.1, |
| "loss": 2.7584, |
| "loss/crossentropy": 2.5395233631134033, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21884238719940186, |
| "loss/reg": 0.1521168053150177, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.00303, |
| "grad_norm": 1.7543840408325195, |
| "grad_norm_var": 0.014070937374316786, |
| "learning_rate": 0.1, |
| "loss": 2.5599, |
| "loss/crossentropy": 2.3246779441833496, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2351926565170288, |
| "loss/reg": 0.15205931663513184, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.00304, |
| "grad_norm": 1.7532981634140015, |
| "grad_norm_var": 0.01247968993753048, |
| "learning_rate": 0.1, |
| "loss": 2.5013, |
| "loss/crossentropy": 2.276309013366699, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22498828172683716, |
| "loss/reg": 0.1519249975681305, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.00305, |
| "grad_norm": 1.7168755531311035, |
| "grad_norm_var": 0.012384958064664175, |
| "learning_rate": 0.1, |
| "loss": 2.8631, |
| "loss/crossentropy": 2.6478779315948486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2151927351951599, |
| "loss/reg": 0.15202872455120087, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.00306, |
| "grad_norm": 2.2369191646575928, |
| "grad_norm_var": 0.023535843003298317, |
| "learning_rate": 0.1, |
| "loss": 2.8013, |
| "loss/crossentropy": 2.553875684738159, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24745815992355347, |
| "loss/reg": 0.15191484987735748, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.00307, |
| "grad_norm": 1.7659687995910645, |
| "grad_norm_var": 0.02290003494517041, |
| "learning_rate": 0.1, |
| "loss": 2.7114, |
| "loss/crossentropy": 2.4934709072113037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21790578961372375, |
| "loss/reg": 0.15202990174293518, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.00308, |
| "grad_norm": 1.9510020017623901, |
| "grad_norm_var": 0.023255202549692853, |
| "learning_rate": 0.1, |
| "loss": 2.8284, |
| "loss/crossentropy": 2.5950257778167725, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23339563608169556, |
| "loss/reg": 0.15196770429611206, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.00309, |
| "grad_norm": 1.6753053665161133, |
| "grad_norm_var": 0.021070075182135817, |
| "learning_rate": 0.1, |
| "loss": 2.568, |
| "loss/crossentropy": 2.374311923980713, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19368112087249756, |
| "loss/reg": 0.1519303172826767, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.0031, |
| "grad_norm": 1.7081917524337769, |
| "grad_norm_var": 0.0217660034883977, |
| "learning_rate": 0.1, |
| "loss": 2.783, |
| "loss/crossentropy": 2.5761661529541016, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20679108798503876, |
| "loss/reg": 0.1519574671983719, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00311, |
| "grad_norm": 1.722778558731079, |
| "grad_norm_var": 0.021707404135818762, |
| "learning_rate": 0.1, |
| "loss": 2.8797, |
| "loss/crossentropy": 2.6765241622924805, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20322000980377197, |
| "loss/reg": 0.1520189791917801, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.00312, |
| "grad_norm": 1.7546467781066895, |
| "grad_norm_var": 0.021817844161866103, |
| "learning_rate": 0.1, |
| "loss": 2.7514, |
| "loss/crossentropy": 2.522287130355835, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2291133850812912, |
| "loss/reg": 0.1520421952009201, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.00313, |
| "grad_norm": 1.7675800323486328, |
| "grad_norm_var": 0.02152506607900264, |
| "learning_rate": 0.1, |
| "loss": 2.6192, |
| "loss/crossentropy": 2.392517328262329, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22670842707157135, |
| "loss/reg": 0.1520325392484665, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.00314, |
| "grad_norm": 1.913899540901184, |
| "grad_norm_var": 0.022345409626765365, |
| "learning_rate": 0.1, |
| "loss": 3.0393, |
| "loss/crossentropy": 2.796639919281006, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24264192581176758, |
| "loss/reg": 0.15205076336860657, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.00315, |
| "grad_norm": 1.7384769916534424, |
| "grad_norm_var": 0.020624846164646195, |
| "learning_rate": 0.1, |
| "loss": 2.8778, |
| "loss/crossentropy": 2.6688342094421387, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2090141326189041, |
| "loss/reg": 0.15191131830215454, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.00316, |
| "grad_norm": 2.6338746547698975, |
| "grad_norm_var": 0.06360695890862847, |
| "learning_rate": 0.1, |
| "loss": 2.3198, |
| "loss/crossentropy": 2.0772805213928223, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24255335330963135, |
| "loss/reg": 0.1519191414117813, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.00317, |
| "grad_norm": 1.852968454360962, |
| "grad_norm_var": 0.06144055456700901, |
| "learning_rate": 0.1, |
| "loss": 2.6501, |
| "loss/crossentropy": 2.433250665664673, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21681655943393707, |
| "loss/reg": 0.15181362628936768, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.00318, |
| "grad_norm": 1.6826399564743042, |
| "grad_norm_var": 0.06298863780118603, |
| "learning_rate": 0.1, |
| "loss": 2.7515, |
| "loss/crossentropy": 2.5635387897491455, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18794381618499756, |
| "loss/reg": 0.15178216993808746, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.00319, |
| "grad_norm": 1.85292387008667, |
| "grad_norm_var": 0.0623155972699213, |
| "learning_rate": 0.1, |
| "loss": 2.4755, |
| "loss/crossentropy": 2.2641425132751465, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21139657497406006, |
| "loss/reg": 0.1518886387348175, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.6966912746429443, |
| "grad_norm_var": 0.06330580774900696, |
| "learning_rate": 0.1, |
| "loss": 2.4953, |
| "loss/crossentropy": 2.2871975898742676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20806269347667694, |
| "loss/reg": 0.15178616344928741, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.00321, |
| "grad_norm": 1.8895527124404907, |
| "grad_norm_var": 0.062002591347704514, |
| "learning_rate": 0.1, |
| "loss": 2.7715, |
| "loss/crossentropy": 2.5435333251953125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22799861431121826, |
| "loss/reg": 0.15174005925655365, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.00322, |
| "grad_norm": 1.6515710353851318, |
| "grad_norm_var": 0.05440684205244768, |
| "learning_rate": 0.1, |
| "loss": 2.7137, |
| "loss/crossentropy": 2.5104598999023438, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20323163270950317, |
| "loss/reg": 0.15175510942935944, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.00323, |
| "grad_norm": 1.6686187982559204, |
| "grad_norm_var": 0.055812491699098625, |
| "learning_rate": 0.1, |
| "loss": 2.7788, |
| "loss/crossentropy": 2.5798561573028564, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19892914593219757, |
| "loss/reg": 0.1516965925693512, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.00324, |
| "grad_norm": 1.5869677066802979, |
| "grad_norm_var": 0.05786002371726552, |
| "learning_rate": 0.1, |
| "loss": 2.5339, |
| "loss/crossentropy": 2.3427820205688477, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1911391317844391, |
| "loss/reg": 0.15177246928215027, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.00325, |
| "grad_norm": 1.7297979593276978, |
| "grad_norm_var": 0.057141126929102865, |
| "learning_rate": 0.1, |
| "loss": 2.7126, |
| "loss/crossentropy": 2.4887728691101074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2238450050354004, |
| "loss/reg": 0.15161532163619995, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.00326, |
| "grad_norm": 1.8590242862701416, |
| "grad_norm_var": 0.05665234400511133, |
| "learning_rate": 0.1, |
| "loss": 2.7914, |
| "loss/crossentropy": 2.570279359817505, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2211388647556305, |
| "loss/reg": 0.1517268717288971, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.00327, |
| "grad_norm": 2.265671491622925, |
| "grad_norm_var": 0.06856948325591923, |
| "learning_rate": 0.1, |
| "loss": 2.7235, |
| "loss/crossentropy": 2.455695390701294, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26784729957580566, |
| "loss/reg": 0.15171390771865845, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.00328, |
| "grad_norm": 1.7335538864135742, |
| "grad_norm_var": 0.06885577598178678, |
| "learning_rate": 0.1, |
| "loss": 2.8843, |
| "loss/crossentropy": 2.6798086166381836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20445087552070618, |
| "loss/reg": 0.15172886848449707, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.00329, |
| "grad_norm": 1.7656223773956299, |
| "grad_norm_var": 0.06887628591748148, |
| "learning_rate": 0.1, |
| "loss": 2.8846, |
| "loss/crossentropy": 2.6698789596557617, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2146957516670227, |
| "loss/reg": 0.15165625512599945, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.0033, |
| "grad_norm": 1.671199083328247, |
| "grad_norm_var": 0.07033191381487024, |
| "learning_rate": 0.1, |
| "loss": 2.5404, |
| "loss/crossentropy": 2.338350296020508, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20202696323394775, |
| "loss/reg": 0.1517135202884674, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.00331, |
| "grad_norm": 1.8885756731033325, |
| "grad_norm_var": 0.06990940783870526, |
| "learning_rate": 0.1, |
| "loss": 2.68, |
| "loss/crossentropy": 2.452841281890869, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22712501883506775, |
| "loss/reg": 0.15173739194869995, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.00332, |
| "grad_norm": 1.9748201370239258, |
| "grad_norm_var": 0.02723654844425495, |
| "learning_rate": 0.1, |
| "loss": 2.7801, |
| "loss/crossentropy": 2.571702241897583, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20838361978530884, |
| "loss/reg": 0.15167583525180817, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.00333, |
| "grad_norm": 1.873297095298767, |
| "grad_norm_var": 0.02741099552111829, |
| "learning_rate": 0.1, |
| "loss": 2.6248, |
| "loss/crossentropy": 2.401357650756836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22346417605876923, |
| "loss/reg": 0.1517009288072586, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.00334, |
| "grad_norm": 1.8609498739242554, |
| "grad_norm_var": 0.026622028135649126, |
| "learning_rate": 0.1, |
| "loss": 2.6895, |
| "loss/crossentropy": 2.4700403213500977, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2194143533706665, |
| "loss/reg": 0.15159879624843597, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.00335, |
| "grad_norm": 2.0998973846435547, |
| "grad_norm_var": 0.03182955940311369, |
| "learning_rate": 0.1, |
| "loss": 2.5485, |
| "loss/crossentropy": 2.32611346244812, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22242875397205353, |
| "loss/reg": 0.15164640545845032, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.00336, |
| "grad_norm": 2.1356918811798096, |
| "grad_norm_var": 0.03630646625152565, |
| "learning_rate": 0.1, |
| "loss": 2.4518, |
| "loss/crossentropy": 2.2482879161834717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20349064469337463, |
| "loss/reg": 0.1515902280807495, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.00337, |
| "grad_norm": 2.1505563259124756, |
| "grad_norm_var": 0.04182138368415966, |
| "learning_rate": 0.1, |
| "loss": 2.7676, |
| "loss/crossentropy": 2.526437520980835, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24113261699676514, |
| "loss/reg": 0.15154071152210236, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.00338, |
| "grad_norm": 1.7091618776321411, |
| "grad_norm_var": 0.04035341849712684, |
| "learning_rate": 0.1, |
| "loss": 2.796, |
| "loss/crossentropy": 2.591268301010132, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2047044336795807, |
| "loss/reg": 0.1516602486371994, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.00339, |
| "grad_norm": 1.6342699527740479, |
| "grad_norm_var": 0.04136474050778383, |
| "learning_rate": 0.1, |
| "loss": 2.6985, |
| "loss/crossentropy": 2.4958066940307617, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20268017053604126, |
| "loss/reg": 0.15158210694789886, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.0034, |
| "grad_norm": 1.7044177055358887, |
| "grad_norm_var": 0.03777595919577544, |
| "learning_rate": 0.1, |
| "loss": 2.847, |
| "loss/crossentropy": 2.633756637573242, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2132689654827118, |
| "loss/reg": 0.15149760246276855, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.00341, |
| "grad_norm": 1.6277996301651, |
| "grad_norm_var": 0.040448933453944784, |
| "learning_rate": 0.1, |
| "loss": 2.6388, |
| "loss/crossentropy": 2.4233527183532715, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2154182493686676, |
| "loss/reg": 0.15167653560638428, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.00342, |
| "grad_norm": 1.963236927986145, |
| "grad_norm_var": 0.040945224215512846, |
| "learning_rate": 0.1, |
| "loss": 2.6748, |
| "loss/crossentropy": 2.4660699367523193, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20870399475097656, |
| "loss/reg": 0.15154457092285156, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.00343, |
| "grad_norm": 2.034842014312744, |
| "grad_norm_var": 0.03236452026117244, |
| "learning_rate": 0.1, |
| "loss": 2.9002, |
| "loss/crossentropy": 2.6857335567474365, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21445584297180176, |
| "loss/reg": 0.15163888037204742, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.00344, |
| "grad_norm": 1.7031670808792114, |
| "grad_norm_var": 0.03295172772607717, |
| "learning_rate": 0.1, |
| "loss": 2.8464, |
| "loss/crossentropy": 2.6435418128967285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2028711438179016, |
| "loss/reg": 0.15156038105487823, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.00345, |
| "grad_norm": 1.6758556365966797, |
| "grad_norm_var": 0.03461300903464863, |
| "learning_rate": 0.1, |
| "loss": 3.0064, |
| "loss/crossentropy": 2.8023266792297363, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20409126579761505, |
| "loss/reg": 0.1514110416173935, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.00346, |
| "grad_norm": 1.6397191286087036, |
| "grad_norm_var": 0.03545369502609953, |
| "learning_rate": 0.1, |
| "loss": 2.7351, |
| "loss/crossentropy": 2.5252699851989746, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2097804695367813, |
| "loss/reg": 0.15157240629196167, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.00347, |
| "grad_norm": 1.9063996076583862, |
| "grad_norm_var": 0.035553899990960265, |
| "learning_rate": 0.1, |
| "loss": 2.5138, |
| "loss/crossentropy": 2.278761148452759, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23501995205879211, |
| "loss/reg": 0.151409313082695, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.00348, |
| "grad_norm": 1.7547756433486938, |
| "grad_norm_var": 0.035090511537960306, |
| "learning_rate": 0.1, |
| "loss": 2.8161, |
| "loss/crossentropy": 2.5985119342803955, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2175731062889099, |
| "loss/reg": 0.15141859650611877, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.00349, |
| "grad_norm": 1.921971321105957, |
| "grad_norm_var": 0.035440873666032977, |
| "learning_rate": 0.1, |
| "loss": 3.2051, |
| "loss/crossentropy": 2.9968109130859375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2082539051771164, |
| "loss/reg": 0.15133626759052277, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 1.769417405128479, |
| "grad_norm_var": 0.035771921835031625, |
| "learning_rate": 0.1, |
| "loss": 2.4782, |
| "loss/crossentropy": 2.2439804077148438, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23421438038349152, |
| "loss/reg": 0.15139971673488617, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.00351, |
| "grad_norm": 1.8546223640441895, |
| "grad_norm_var": 0.031014371363842958, |
| "learning_rate": 0.1, |
| "loss": 2.5961, |
| "loss/crossentropy": 2.38114857673645, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21499796211719513, |
| "loss/reg": 0.15137694776058197, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.00352, |
| "grad_norm": 1.7459526062011719, |
| "grad_norm_var": 0.024316958392341565, |
| "learning_rate": 0.1, |
| "loss": 2.8022, |
| "loss/crossentropy": 2.5873653888702393, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2148575484752655, |
| "loss/reg": 0.1513848453760147, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.00353, |
| "grad_norm": 1.7052503824234009, |
| "grad_norm_var": 0.015882339064703346, |
| "learning_rate": 0.1, |
| "loss": 2.7235, |
| "loss/crossentropy": 2.517469644546509, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20604610443115234, |
| "loss/reg": 0.1513541340827942, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.00354, |
| "grad_norm": 1.8218839168548584, |
| "grad_norm_var": 0.015733119556617936, |
| "learning_rate": 0.1, |
| "loss": 2.417, |
| "loss/crossentropy": 2.201317548751831, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21567374467849731, |
| "loss/reg": 0.15136994421482086, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.00355, |
| "grad_norm": 1.7261645793914795, |
| "grad_norm_var": 0.01448790725424729, |
| "learning_rate": 0.1, |
| "loss": 2.8281, |
| "loss/crossentropy": 2.5972352027893066, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23090910911560059, |
| "loss/reg": 0.15145841240882874, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.00356, |
| "grad_norm": 1.6171144247055054, |
| "grad_norm_var": 0.01589899546990091, |
| "learning_rate": 0.1, |
| "loss": 2.8327, |
| "loss/crossentropy": 2.6116435527801514, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22101661562919617, |
| "loss/reg": 0.15129587054252625, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.00357, |
| "grad_norm": 1.7615368366241455, |
| "grad_norm_var": 0.014316048974474664, |
| "learning_rate": 0.1, |
| "loss": 2.8508, |
| "loss/crossentropy": 2.650190591812134, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2005804479122162, |
| "loss/reg": 0.15146516263484955, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.00358, |
| "grad_norm": 1.7060468196868896, |
| "grad_norm_var": 0.0124279410334679, |
| "learning_rate": 0.1, |
| "loss": 2.8565, |
| "loss/crossentropy": 2.629793643951416, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22666966915130615, |
| "loss/reg": 0.15129975974559784, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.00359, |
| "grad_norm": 2.1723055839538574, |
| "grad_norm_var": 0.01843478885598125, |
| "learning_rate": 0.1, |
| "loss": 2.6974, |
| "loss/crossentropy": 2.4849488735198975, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2124479115009308, |
| "loss/reg": 0.1513231247663498, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.0036, |
| "grad_norm": 1.6971791982650757, |
| "grad_norm_var": 0.018498480923589432, |
| "learning_rate": 0.1, |
| "loss": 2.7715, |
| "loss/crossentropy": 2.5426037311553955, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22888851165771484, |
| "loss/reg": 0.15143375098705292, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.00361, |
| "grad_norm": 1.8957135677337646, |
| "grad_norm_var": 0.018473617216682156, |
| "learning_rate": 0.1, |
| "loss": 2.5563, |
| "loss/crossentropy": 2.33087420463562, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22537752985954285, |
| "loss/reg": 0.15123027563095093, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.00362, |
| "grad_norm": 1.697386384010315, |
| "grad_norm_var": 0.017499019978352178, |
| "learning_rate": 0.1, |
| "loss": 2.8224, |
| "loss/crossentropy": 2.6073858737945557, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21505878865718842, |
| "loss/reg": 0.1513531357049942, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.00363, |
| "grad_norm": 1.6574411392211914, |
| "grad_norm_var": 0.017744898481745965, |
| "learning_rate": 0.1, |
| "loss": 2.7982, |
| "loss/crossentropy": 2.5767061710357666, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22147852182388306, |
| "loss/reg": 0.15141496062278748, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.00364, |
| "grad_norm": 3.577742576599121, |
| "grad_norm_var": 0.21893816503593175, |
| "learning_rate": 0.1, |
| "loss": 2.9974, |
| "loss/crossentropy": 2.7592902183532715, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23807193338871002, |
| "loss/reg": 0.15131956338882446, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.00365, |
| "grad_norm": 1.6201366186141968, |
| "grad_norm_var": 0.22356616694174605, |
| "learning_rate": 0.1, |
| "loss": 2.8135, |
| "loss/crossentropy": 2.613086462020874, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20037904381752014, |
| "loss/reg": 0.15114036202430725, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.00366, |
| "grad_norm": 1.6076356172561646, |
| "grad_norm_var": 0.22751442360845597, |
| "learning_rate": 0.1, |
| "loss": 2.5804, |
| "loss/crossentropy": 2.383631706237793, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.196734219789505, |
| "loss/reg": 0.15127165615558624, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.00367, |
| "grad_norm": 1.761296033859253, |
| "grad_norm_var": 0.228206673425899, |
| "learning_rate": 0.1, |
| "loss": 2.9389, |
| "loss/crossentropy": 2.717838764190674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22105395793914795, |
| "loss/reg": 0.1513463258743286, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.00368, |
| "grad_norm": 2.0096921920776367, |
| "grad_norm_var": 0.22851986925648096, |
| "learning_rate": 0.1, |
| "loss": 2.7681, |
| "loss/crossentropy": 2.5275051593780518, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24063444137573242, |
| "loss/reg": 0.15131326019763947, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.00369, |
| "grad_norm": 2.0786919593811035, |
| "grad_norm_var": 0.2286763788537901, |
| "learning_rate": 0.1, |
| "loss": 2.37, |
| "loss/crossentropy": 2.127150297164917, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24289056658744812, |
| "loss/reg": 0.15122093260288239, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.0037, |
| "grad_norm": 1.8763484954833984, |
| "grad_norm_var": 0.22829088744450884, |
| "learning_rate": 0.1, |
| "loss": 2.6493, |
| "loss/crossentropy": 2.4364054203033447, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21287497878074646, |
| "loss/reg": 0.15139250457286835, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.00371, |
| "grad_norm": 1.7257153987884521, |
| "grad_norm_var": 0.22830154488171286, |
| "learning_rate": 0.1, |
| "loss": 2.7601, |
| "loss/crossentropy": 2.541410207748413, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21869133412837982, |
| "loss/reg": 0.15126635134220123, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.00372, |
| "grad_norm": 1.7427912950515747, |
| "grad_norm_var": 0.22448350719514465, |
| "learning_rate": 0.1, |
| "loss": 2.7006, |
| "loss/crossentropy": 2.4843952655792236, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21623246371746063, |
| "loss/reg": 0.1512490212917328, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.00373, |
| "grad_norm": 1.615275263786316, |
| "grad_norm_var": 0.22874950889401002, |
| "learning_rate": 0.1, |
| "loss": 2.82, |
| "loss/crossentropy": 2.612550973892212, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20746397972106934, |
| "loss/reg": 0.15116584300994873, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.00374, |
| "grad_norm": 1.8672704696655273, |
| "grad_norm_var": 0.22614914385669233, |
| "learning_rate": 0.1, |
| "loss": 2.8124, |
| "loss/crossentropy": 2.6045186519622803, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20791369676589966, |
| "loss/reg": 0.15127474069595337, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.00375, |
| "grad_norm": 1.7656140327453613, |
| "grad_norm_var": 0.22240730735532163, |
| "learning_rate": 0.1, |
| "loss": 2.7487, |
| "loss/crossentropy": 2.520578384399414, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22809343039989471, |
| "loss/reg": 0.1510974019765854, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.00376, |
| "grad_norm": 2.1074674129486084, |
| "grad_norm_var": 0.22253073073414853, |
| "learning_rate": 0.1, |
| "loss": 3.1286, |
| "loss/crossentropy": 2.8800649642944336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24856722354888916, |
| "loss/reg": 0.15111801028251648, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.00377, |
| "grad_norm": 1.7181384563446045, |
| "grad_norm_var": 0.22490818728325238, |
| "learning_rate": 0.1, |
| "loss": 2.829, |
| "loss/crossentropy": 2.6191015243530273, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20989225804805756, |
| "loss/reg": 0.15117129683494568, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.00378, |
| "grad_norm": 1.811388611793518, |
| "grad_norm_var": 0.22261347017223437, |
| "learning_rate": 0.1, |
| "loss": 2.8111, |
| "loss/crossentropy": 2.5947234630584717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21635966002941132, |
| "loss/reg": 0.15109507739543915, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.00379, |
| "grad_norm": 1.8977841138839722, |
| "grad_norm_var": 0.2181650921676635, |
| "learning_rate": 0.1, |
| "loss": 2.9514, |
| "loss/crossentropy": 2.7174222469329834, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.234025239944458, |
| "loss/reg": 0.15127557516098022, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.0038, |
| "grad_norm": 1.775944709777832, |
| "grad_norm_var": 0.02375998105478061, |
| "learning_rate": 0.1, |
| "loss": 2.7812, |
| "loss/crossentropy": 2.5766677856445312, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20452779531478882, |
| "loss/reg": 0.1511276364326477, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.00381, |
| "grad_norm": 1.6730824708938599, |
| "grad_norm_var": 0.02258550488107455, |
| "learning_rate": 0.1, |
| "loss": 2.6852, |
| "loss/crossentropy": 2.47645902633667, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20877960324287415, |
| "loss/reg": 0.15118587017059326, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.00382, |
| "grad_norm": 1.7737575769424438, |
| "grad_norm_var": 0.019725366544345726, |
| "learning_rate": 0.1, |
| "loss": 2.5871, |
| "loss/crossentropy": 2.3758537769317627, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21125558018684387, |
| "loss/reg": 0.15116703510284424, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.00383, |
| "grad_norm": 1.6510947942733765, |
| "grad_norm_var": 0.021420657643567566, |
| "learning_rate": 0.1, |
| "loss": 2.7066, |
| "loss/crossentropy": 2.4972774982452393, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20929165184497833, |
| "loss/reg": 0.15117885172367096, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 1.6992582082748413, |
| "grad_norm_var": 0.01951468885309448, |
| "learning_rate": 0.1, |
| "loss": 2.727, |
| "loss/crossentropy": 2.508112668991089, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21886557340621948, |
| "loss/reg": 0.15112744271755219, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.00385, |
| "grad_norm": 1.7897005081176758, |
| "grad_norm_var": 0.013946757521915506, |
| "learning_rate": 0.1, |
| "loss": 2.7238, |
| "loss/crossentropy": 2.4994559288024902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22430163621902466, |
| "loss/reg": 0.15109151601791382, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.00386, |
| "grad_norm": 1.7458773851394653, |
| "grad_norm_var": 0.013346143788473283, |
| "learning_rate": 0.1, |
| "loss": 2.7609, |
| "loss/crossentropy": 2.5685338973999023, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1923883855342865, |
| "loss/reg": 0.15124017000198364, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.00387, |
| "grad_norm": 2.219231128692627, |
| "grad_norm_var": 0.025489318950901858, |
| "learning_rate": 0.1, |
| "loss": 2.9048, |
| "loss/crossentropy": 2.662940740585327, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24183247983455658, |
| "loss/reg": 0.15118682384490967, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.00388, |
| "grad_norm": 1.885354995727539, |
| "grad_norm_var": 0.025608373941962744, |
| "learning_rate": 0.1, |
| "loss": 3.041, |
| "loss/crossentropy": 2.8259406089782715, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21508657932281494, |
| "loss/reg": 0.15106505155563354, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.00389, |
| "grad_norm": 1.857495665550232, |
| "grad_norm_var": 0.02291330277758637, |
| "learning_rate": 0.1, |
| "loss": 2.7711, |
| "loss/crossentropy": 2.535196542739868, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23586329817771912, |
| "loss/reg": 0.1510740965604782, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.0039, |
| "grad_norm": 1.6724786758422852, |
| "grad_norm_var": 0.024249365860873694, |
| "learning_rate": 0.1, |
| "loss": 2.7789, |
| "loss/crossentropy": 2.5663368701934814, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2125384360551834, |
| "loss/reg": 0.15109118819236755, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.00391, |
| "grad_norm": 2.6903529167175293, |
| "grad_norm_var": 0.07157825257445817, |
| "learning_rate": 0.1, |
| "loss": 2.3173, |
| "loss/crossentropy": 2.0604658126831055, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25682878494262695, |
| "loss/reg": 0.1511024385690689, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.00392, |
| "grad_norm": 1.8901022672653198, |
| "grad_norm_var": 0.06773662724080692, |
| "learning_rate": 0.1, |
| "loss": 2.8202, |
| "loss/crossentropy": 2.595571994781494, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2246021330356598, |
| "loss/reg": 0.15106593072414398, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.00393, |
| "grad_norm": 1.7739040851593018, |
| "grad_norm_var": 0.06688035299611143, |
| "learning_rate": 0.1, |
| "loss": 2.7813, |
| "loss/crossentropy": 2.566063165664673, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2152460813522339, |
| "loss/reg": 0.15100258588790894, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.00394, |
| "grad_norm": 1.7922471761703491, |
| "grad_norm_var": 0.06703478467787101, |
| "learning_rate": 0.1, |
| "loss": 2.5707, |
| "loss/crossentropy": 2.353325128555298, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2173391878604889, |
| "loss/reg": 0.15109707415103912, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.00395, |
| "grad_norm": 1.8474063873291016, |
| "grad_norm_var": 0.06695122222152937, |
| "learning_rate": 0.1, |
| "loss": 2.7048, |
| "loss/crossentropy": 2.4807169437408447, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22405576705932617, |
| "loss/reg": 0.15113972127437592, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.00396, |
| "grad_norm": 1.8227876424789429, |
| "grad_norm_var": 0.0665722427534499, |
| "learning_rate": 0.1, |
| "loss": 2.759, |
| "loss/crossentropy": 2.5347976684570312, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22420494258403778, |
| "loss/reg": 0.15105532109737396, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.00397, |
| "grad_norm": 1.6536616086959839, |
| "grad_norm_var": 0.06708373466487429, |
| "learning_rate": 0.1, |
| "loss": 2.6341, |
| "loss/crossentropy": 2.4113383293151855, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2227448672056198, |
| "loss/reg": 0.15101544559001923, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.00398, |
| "grad_norm": 1.9241424798965454, |
| "grad_norm_var": 0.06676203250969331, |
| "learning_rate": 0.1, |
| "loss": 2.2804, |
| "loss/crossentropy": 2.0420355796813965, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23833739757537842, |
| "loss/reg": 0.1509397327899933, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.00399, |
| "grad_norm": 1.8979822397232056, |
| "grad_norm_var": 0.06337571736196512, |
| "learning_rate": 0.1, |
| "loss": 2.7958, |
| "loss/crossentropy": 2.5670487880706787, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2287551909685135, |
| "loss/reg": 0.15100519359111786, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 1.9363818168640137, |
| "grad_norm_var": 0.061013521665097416, |
| "learning_rate": 0.1, |
| "loss": 2.7379, |
| "loss/crossentropy": 2.5245933532714844, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21330593526363373, |
| "loss/reg": 0.15094827115535736, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.00401, |
| "grad_norm": 1.795914888381958, |
| "grad_norm_var": 0.06092458917107303, |
| "learning_rate": 0.1, |
| "loss": 2.9236, |
| "loss/crossentropy": 2.7110414505004883, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21257659792900085, |
| "loss/reg": 0.1509622186422348, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.00402, |
| "grad_norm": 2.101243734359741, |
| "grad_norm_var": 0.06149899274136974, |
| "learning_rate": 0.1, |
| "loss": 2.7548, |
| "loss/crossentropy": 2.487922430038452, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2669089734554291, |
| "loss/reg": 0.15110976994037628, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.00403, |
| "grad_norm": 1.8519327640533447, |
| "grad_norm_var": 0.05540100546166915, |
| "learning_rate": 0.1, |
| "loss": 2.6662, |
| "loss/crossentropy": 2.429722785949707, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2364829033613205, |
| "loss/reg": 0.15097306668758392, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.00404, |
| "grad_norm": 1.7932238578796387, |
| "grad_norm_var": 0.05610634068240087, |
| "learning_rate": 0.1, |
| "loss": 2.6978, |
| "loss/crossentropy": 2.47924542427063, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2185240387916565, |
| "loss/reg": 0.15097467601299286, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.00405, |
| "grad_norm": 1.8711391687393188, |
| "grad_norm_var": 0.05605188021955924, |
| "learning_rate": 0.1, |
| "loss": 2.8627, |
| "loss/crossentropy": 2.634406328201294, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22829169034957886, |
| "loss/reg": 0.150837242603302, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.00406, |
| "grad_norm": 1.8737577199935913, |
| "grad_norm_var": 0.05262065947172398, |
| "learning_rate": 0.1, |
| "loss": 2.7406, |
| "loss/crossentropy": 2.5180327892303467, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22260984778404236, |
| "loss/reg": 0.15104174613952637, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.00407, |
| "grad_norm": 1.766380786895752, |
| "grad_norm_var": 0.009504462965704145, |
| "learning_rate": 0.1, |
| "loss": 2.6591, |
| "loss/crossentropy": 2.4351396560668945, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2239249348640442, |
| "loss/reg": 0.15100885927677155, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.00408, |
| "grad_norm": 1.676133394241333, |
| "grad_norm_var": 0.01120790285560241, |
| "learning_rate": 0.1, |
| "loss": 2.6828, |
| "loss/crossentropy": 2.491732120513916, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19106677174568176, |
| "loss/reg": 0.15102165937423706, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.00409, |
| "grad_norm": 1.7344716787338257, |
| "grad_norm_var": 0.011632299859450987, |
| "learning_rate": 0.1, |
| "loss": 2.7485, |
| "loss/crossentropy": 2.518883228302002, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22966621816158295, |
| "loss/reg": 0.1510043442249298, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.0041, |
| "grad_norm": 1.7225534915924072, |
| "grad_norm_var": 0.012320847420900746, |
| "learning_rate": 0.1, |
| "loss": 2.9063, |
| "loss/crossentropy": 2.689192533493042, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21711862087249756, |
| "loss/reg": 0.1509757786989212, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.00411, |
| "grad_norm": 1.9745748043060303, |
| "grad_norm_var": 0.013638260998121095, |
| "learning_rate": 0.1, |
| "loss": 2.707, |
| "loss/crossentropy": 2.490044355392456, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21696624159812927, |
| "loss/reg": 0.1509065479040146, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.00412, |
| "grad_norm": 1.971973180770874, |
| "grad_norm_var": 0.014741254006565658, |
| "learning_rate": 0.1, |
| "loss": 2.8091, |
| "loss/crossentropy": 2.563469171524048, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24558556079864502, |
| "loss/reg": 0.15095914900302887, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.00413, |
| "grad_norm": 1.739637851715088, |
| "grad_norm_var": 0.012991594434782823, |
| "learning_rate": 0.1, |
| "loss": 2.7082, |
| "loss/crossentropy": 2.4907240867614746, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21748635172843933, |
| "loss/reg": 0.1508610099554062, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.00414, |
| "grad_norm": 1.9147230386734009, |
| "grad_norm_var": 0.012906490567419777, |
| "learning_rate": 0.1, |
| "loss": 2.822, |
| "loss/crossentropy": 2.5853893756866455, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23664994537830353, |
| "loss/reg": 0.1509164720773697, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.00415, |
| "grad_norm": 1.8861806392669678, |
| "grad_norm_var": 0.01284185916003177, |
| "learning_rate": 0.1, |
| "loss": 2.9428, |
| "loss/crossentropy": 2.74064564704895, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20214137434959412, |
| "loss/reg": 0.15087473392486572, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.00416, |
| "grad_norm": 1.7048680782318115, |
| "grad_norm_var": 0.01354501806272476, |
| "learning_rate": 0.1, |
| "loss": 2.7708, |
| "loss/crossentropy": 2.554300546646118, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21645048260688782, |
| "loss/reg": 0.150918647646904, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.00417, |
| "grad_norm": 1.6752452850341797, |
| "grad_norm_var": 0.015102754933093366, |
| "learning_rate": 0.1, |
| "loss": 2.8181, |
| "loss/crossentropy": 2.608177661895752, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20995007455348969, |
| "loss/reg": 0.1509387046098709, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.00418, |
| "grad_norm": 2.25610613822937, |
| "grad_norm_var": 0.022230720754456947, |
| "learning_rate": 0.1, |
| "loss": 2.7317, |
| "loss/crossentropy": 2.4656870365142822, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26603609323501587, |
| "loss/reg": 0.15090647339820862, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.00419, |
| "grad_norm": 1.7643238306045532, |
| "grad_norm_var": 0.022551256037004384, |
| "learning_rate": 0.1, |
| "loss": 2.3337, |
| "loss/crossentropy": 2.11780047416687, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21589842438697815, |
| "loss/reg": 0.1508452594280243, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.0042, |
| "grad_norm": 1.7158069610595703, |
| "grad_norm_var": 0.023334674998600667, |
| "learning_rate": 0.1, |
| "loss": 2.8412, |
| "loss/crossentropy": 2.6472179889678955, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19401872158050537, |
| "loss/reg": 0.15094634890556335, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.00421, |
| "grad_norm": 1.85410475730896, |
| "grad_norm_var": 0.0232548130549579, |
| "learning_rate": 0.1, |
| "loss": 2.821, |
| "loss/crossentropy": 2.578828811645508, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24218684434890747, |
| "loss/reg": 0.1509936898946762, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.00422, |
| "grad_norm": 1.8564954996109009, |
| "grad_norm_var": 0.023165651495764668, |
| "learning_rate": 0.1, |
| "loss": 2.6511, |
| "loss/crossentropy": 2.430964469909668, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22010694444179535, |
| "loss/reg": 0.1510194092988968, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.00423, |
| "grad_norm": 1.5391383171081543, |
| "grad_norm_var": 0.028194916219609685, |
| "learning_rate": 0.1, |
| "loss": 2.2987, |
| "loss/crossentropy": 2.112295389175415, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18641194701194763, |
| "loss/reg": 0.15081174671649933, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.00424, |
| "grad_norm": 1.724223017692566, |
| "grad_norm_var": 0.02747055405852728, |
| "learning_rate": 0.1, |
| "loss": 2.5547, |
| "loss/crossentropy": 2.3416965007781982, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21300387382507324, |
| "loss/reg": 0.15074773132801056, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.00425, |
| "grad_norm": 1.6344752311706543, |
| "grad_norm_var": 0.029164538101110794, |
| "learning_rate": 0.1, |
| "loss": 2.6131, |
| "loss/crossentropy": 2.396984338760376, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2160794585943222, |
| "loss/reg": 0.15076155960559845, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.00426, |
| "grad_norm": 1.74626624584198, |
| "grad_norm_var": 0.028928254614097603, |
| "learning_rate": 0.1, |
| "loss": 2.6302, |
| "loss/crossentropy": 2.4160821437835693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21413055062294006, |
| "loss/reg": 0.15082821249961853, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.00427, |
| "grad_norm": 1.656304121017456, |
| "grad_norm_var": 0.028270431898971004, |
| "learning_rate": 0.1, |
| "loss": 2.6774, |
| "loss/crossentropy": 2.4710378646850586, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20640170574188232, |
| "loss/reg": 0.15081879496574402, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.00428, |
| "grad_norm": 1.907077670097351, |
| "grad_norm_var": 0.02695901132224563, |
| "learning_rate": 0.1, |
| "loss": 2.7283, |
| "loss/crossentropy": 2.5030508041381836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2252427637577057, |
| "loss/reg": 0.1508827954530716, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.00429, |
| "grad_norm": 1.6425381898880005, |
| "grad_norm_var": 0.028147688000623283, |
| "learning_rate": 0.1, |
| "loss": 2.707, |
| "loss/crossentropy": 2.4857351779937744, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22125864028930664, |
| "loss/reg": 0.15070487558841705, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.0043, |
| "grad_norm": 1.7558245658874512, |
| "grad_norm_var": 0.02686861746101741, |
| "learning_rate": 0.1, |
| "loss": 2.7412, |
| "loss/crossentropy": 2.5211524963378906, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22008350491523743, |
| "loss/reg": 0.15073436498641968, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.00431, |
| "grad_norm": 1.7740070819854736, |
| "grad_norm_var": 0.025916441567259342, |
| "learning_rate": 0.1, |
| "loss": 2.7119, |
| "loss/crossentropy": 2.4966282844543457, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21523457765579224, |
| "loss/reg": 0.15078803896903992, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.00432, |
| "grad_norm": 2.269322156906128, |
| "grad_norm_var": 0.041460048020039865, |
| "learning_rate": 0.1, |
| "loss": 2.9695, |
| "loss/crossentropy": 2.7718868255615234, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19760069251060486, |
| "loss/reg": 0.15083637833595276, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.00433, |
| "grad_norm": 1.8699089288711548, |
| "grad_norm_var": 0.04063701470830043, |
| "learning_rate": 0.1, |
| "loss": 3.0628, |
| "loss/crossentropy": 2.8523550033569336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21048828959465027, |
| "loss/reg": 0.15076479315757751, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.00434, |
| "grad_norm": 1.778334140777588, |
| "grad_norm_var": 0.026508956261420677, |
| "learning_rate": 0.1, |
| "loss": 2.502, |
| "loss/crossentropy": 2.2839338779449463, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21806152164936066, |
| "loss/reg": 0.15068462491035461, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.00435, |
| "grad_norm": 1.9787874221801758, |
| "grad_norm_var": 0.0289207914913779, |
| "learning_rate": 0.1, |
| "loss": 3.3528, |
| "loss/crossentropy": 3.1274354457855225, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2253539264202118, |
| "loss/reg": 0.15077903866767883, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.00436, |
| "grad_norm": 2.3234996795654297, |
| "grad_norm_var": 0.045672815906441615, |
| "learning_rate": 0.1, |
| "loss": 2.48, |
| "loss/crossentropy": 2.2315609455108643, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2484637051820755, |
| "loss/reg": 0.1508529931306839, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.00437, |
| "grad_norm": 1.8527600765228271, |
| "grad_norm_var": 0.04566894676693683, |
| "learning_rate": 0.1, |
| "loss": 2.868, |
| "loss/crossentropy": 2.6298837661743164, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23809394240379333, |
| "loss/reg": 0.1508105993270874, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.00438, |
| "grad_norm": 1.7559828758239746, |
| "grad_norm_var": 0.04596954570778985, |
| "learning_rate": 0.1, |
| "loss": 2.443, |
| "loss/crossentropy": 2.225604295730591, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21738675236701965, |
| "loss/reg": 0.15080511569976807, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.00439, |
| "grad_norm": 2.156735897064209, |
| "grad_norm_var": 0.04622556667786875, |
| "learning_rate": 0.1, |
| "loss": 2.5675, |
| "loss/crossentropy": 2.344963788986206, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22257152199745178, |
| "loss/reg": 0.15074650943279266, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.0044, |
| "grad_norm": 1.9818342924118042, |
| "grad_norm_var": 0.04556781112082569, |
| "learning_rate": 0.1, |
| "loss": 2.8998, |
| "loss/crossentropy": 2.677990198135376, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22182998061180115, |
| "loss/reg": 0.15067626535892487, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.00441, |
| "grad_norm": 1.6748995780944824, |
| "grad_norm_var": 0.04434535452248311, |
| "learning_rate": 0.1, |
| "loss": 2.6591, |
| "loss/crossentropy": 2.441425323486328, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21762937307357788, |
| "loss/reg": 0.1507471650838852, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.00442, |
| "grad_norm": 1.847016453742981, |
| "grad_norm_var": 0.04314626212859351, |
| "learning_rate": 0.1, |
| "loss": 2.7675, |
| "loss/crossentropy": 2.5337014198303223, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2337638884782791, |
| "loss/reg": 0.15065504610538483, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.00443, |
| "grad_norm": 1.6504738330841064, |
| "grad_norm_var": 0.04332931831966859, |
| "learning_rate": 0.1, |
| "loss": 2.5161, |
| "loss/crossentropy": 2.3027710914611816, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21334794163703918, |
| "loss/reg": 0.15072591602802277, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.00444, |
| "grad_norm": 1.6157153844833374, |
| "grad_norm_var": 0.04792064713236114, |
| "learning_rate": 0.1, |
| "loss": 2.5919, |
| "loss/crossentropy": 2.382761240005493, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20910042524337769, |
| "loss/reg": 0.15072768926620483, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.00445, |
| "grad_norm": 2.431079626083374, |
| "grad_norm_var": 0.06281771446281799, |
| "learning_rate": 0.1, |
| "loss": 2.6361, |
| "loss/crossentropy": 2.388833999633789, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24729838967323303, |
| "loss/reg": 0.15076467394828796, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.00446, |
| "grad_norm": 1.859398365020752, |
| "grad_norm_var": 0.06122424412511288, |
| "learning_rate": 0.1, |
| "loss": 2.5666, |
| "loss/crossentropy": 2.3410301208496094, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2255995273590088, |
| "loss/reg": 0.15055029094219208, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.00447, |
| "grad_norm": 1.7151768207550049, |
| "grad_norm_var": 0.06263463563082776, |
| "learning_rate": 0.1, |
| "loss": 2.5413, |
| "loss/crossentropy": 2.3206253051757812, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22071394324302673, |
| "loss/reg": 0.15074747800827026, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 1.7584843635559082, |
| "grad_norm_var": 0.05532563008642152, |
| "learning_rate": 0.1, |
| "loss": 2.4155, |
| "loss/crossentropy": 2.2007665634155273, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21476951241493225, |
| "loss/reg": 0.15062829852104187, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.00449, |
| "grad_norm": 2.7364625930786133, |
| "grad_norm_var": 0.09986365483561167, |
| "learning_rate": 0.1, |
| "loss": 2.6228, |
| "loss/crossentropy": 2.402888059616089, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21995289623737335, |
| "loss/reg": 0.15081089735031128, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 2.3574018478393555, |
| "grad_norm_var": 0.10796921612384545, |
| "learning_rate": 0.1, |
| "loss": 3.0125, |
| "loss/crossentropy": 2.7703285217285156, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24217334389686584, |
| "loss/reg": 0.1507427841424942, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.00451, |
| "grad_norm": 1.6977275609970093, |
| "grad_norm_var": 0.11298861573303398, |
| "learning_rate": 0.1, |
| "loss": 2.4328, |
| "loss/crossentropy": 2.2355422973632812, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19724467396736145, |
| "loss/reg": 0.15060681104660034, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.00452, |
| "grad_norm": 2.03483247756958, |
| "grad_norm_var": 0.10433740014997639, |
| "learning_rate": 0.1, |
| "loss": 2.7943, |
| "loss/crossentropy": 2.584197521209717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21007749438285828, |
| "loss/reg": 0.1506909430027008, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.00453, |
| "grad_norm": 1.7026461362838745, |
| "grad_norm_var": 0.10759947035987896, |
| "learning_rate": 0.1, |
| "loss": 2.8445, |
| "loss/crossentropy": 2.6189165115356445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22557061910629272, |
| "loss/reg": 0.15073059499263763, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.00454, |
| "grad_norm": 1.7741223573684692, |
| "grad_norm_var": 0.1071846663731634, |
| "learning_rate": 0.1, |
| "loss": 2.7748, |
| "loss/crossentropy": 2.5624852180480957, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21229374408721924, |
| "loss/reg": 0.1507757306098938, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.00455, |
| "grad_norm": 1.5842708349227905, |
| "grad_norm_var": 0.1109043592521454, |
| "learning_rate": 0.1, |
| "loss": 2.8298, |
| "loss/crossentropy": 2.6428112983703613, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18703559041023254, |
| "loss/reg": 0.1507103443145752, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.00456, |
| "grad_norm": 2.0186939239501953, |
| "grad_norm_var": 0.11138484094866215, |
| "learning_rate": 0.1, |
| "loss": 2.6154, |
| "loss/crossentropy": 2.3989930152893066, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21640828251838684, |
| "loss/reg": 0.15072603523731232, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.00457, |
| "grad_norm": 1.610592007637024, |
| "grad_norm_var": 0.11360469309714834, |
| "learning_rate": 0.1, |
| "loss": 2.5258, |
| "loss/crossentropy": 2.3233416080474854, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2024168074131012, |
| "loss/reg": 0.15062075853347778, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.00458, |
| "grad_norm": 1.7808010578155518, |
| "grad_norm_var": 0.11434324130148994, |
| "learning_rate": 0.1, |
| "loss": 2.5819, |
| "loss/crossentropy": 2.352149248123169, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22971388697624207, |
| "loss/reg": 0.15061435103416443, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.00459, |
| "grad_norm": 1.8864959478378296, |
| "grad_norm_var": 0.11011425212028166, |
| "learning_rate": 0.1, |
| "loss": 2.7855, |
| "loss/crossentropy": 2.5464529991149902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23908357322216034, |
| "loss/reg": 0.1506984829902649, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0046, |
| "grad_norm": 2.00264310836792, |
| "grad_norm_var": 0.10427648903603408, |
| "learning_rate": 0.1, |
| "loss": 2.6776, |
| "loss/crossentropy": 2.4802725315093994, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.197281152009964, |
| "loss/reg": 0.15069958567619324, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.00461, |
| "grad_norm": 1.8057889938354492, |
| "grad_norm_var": 0.08730628236937506, |
| "learning_rate": 0.1, |
| "loss": 2.5994, |
| "loss/crossentropy": 2.37688946723938, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22249619662761688, |
| "loss/reg": 0.1506558060646057, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.00462, |
| "grad_norm": 1.7659811973571777, |
| "grad_norm_var": 0.0882994573520096, |
| "learning_rate": 0.1, |
| "loss": 2.7173, |
| "loss/crossentropy": 2.489809513092041, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22747084498405457, |
| "loss/reg": 0.15059794485569, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.00463, |
| "grad_norm": 2.0050625801086426, |
| "grad_norm_var": 0.08681343313072414, |
| "learning_rate": 0.1, |
| "loss": 2.8941, |
| "loss/crossentropy": 2.66090726852417, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2331850826740265, |
| "loss/reg": 0.1508011519908905, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.00464, |
| "grad_norm": 1.771021842956543, |
| "grad_norm_var": 0.08657394365396556, |
| "learning_rate": 0.1, |
| "loss": 2.6964, |
| "loss/crossentropy": 2.48235821723938, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21406123042106628, |
| "loss/reg": 0.1506277620792389, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.00465, |
| "grad_norm": 2.151787757873535, |
| "grad_norm_var": 0.04338695762323918, |
| "learning_rate": 0.1, |
| "loss": 2.8095, |
| "loss/crossentropy": 2.5785462856292725, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23099994659423828, |
| "loss/reg": 0.1506529450416565, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.00466, |
| "grad_norm": 1.7298078536987305, |
| "grad_norm_var": 0.027374916825079747, |
| "learning_rate": 0.1, |
| "loss": 2.539, |
| "loss/crossentropy": 2.3282477855682373, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21078172326087952, |
| "loss/reg": 0.15064556896686554, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.00467, |
| "grad_norm": 1.937821865081787, |
| "grad_norm_var": 0.026658780777643602, |
| "learning_rate": 0.1, |
| "loss": 2.8518, |
| "loss/crossentropy": 2.634305953979492, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.217473566532135, |
| "loss/reg": 0.15058143436908722, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.00468, |
| "grad_norm": 1.6626935005187988, |
| "grad_norm_var": 0.02602643177517662, |
| "learning_rate": 0.1, |
| "loss": 2.6639, |
| "loss/crossentropy": 2.4512810707092285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21257013082504272, |
| "loss/reg": 0.15062156319618225, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.00469, |
| "grad_norm": 2.0860655307769775, |
| "grad_norm_var": 0.028990751511518434, |
| "learning_rate": 0.1, |
| "loss": 2.8266, |
| "loss/crossentropy": 2.5906851291656494, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23596248030662537, |
| "loss/reg": 0.15065304934978485, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.0047, |
| "grad_norm": 1.8677291870117188, |
| "grad_norm_var": 0.02861192356435911, |
| "learning_rate": 0.1, |
| "loss": 2.7921, |
| "loss/crossentropy": 2.568392753601074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22374555468559265, |
| "loss/reg": 0.15052491426467896, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.00471, |
| "grad_norm": 1.6846789121627808, |
| "grad_norm_var": 0.02562824462433794, |
| "learning_rate": 0.1, |
| "loss": 2.7454, |
| "loss/crossentropy": 2.533276081085205, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21212953329086304, |
| "loss/reg": 0.15049760043621063, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.00472, |
| "grad_norm": 1.8622286319732666, |
| "grad_norm_var": 0.02385764709813092, |
| "learning_rate": 0.1, |
| "loss": 2.9384, |
| "loss/crossentropy": 2.7142906188964844, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22409212589263916, |
| "loss/reg": 0.1504972279071808, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.00473, |
| "grad_norm": 1.7269498109817505, |
| "grad_norm_var": 0.02097871813139367, |
| "learning_rate": 0.1, |
| "loss": 2.6139, |
| "loss/crossentropy": 2.406529664993286, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2073470950126648, |
| "loss/reg": 0.15070390701293945, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.00474, |
| "grad_norm": 1.7890056371688843, |
| "grad_norm_var": 0.02089850424894332, |
| "learning_rate": 0.1, |
| "loss": 2.7875, |
| "loss/crossentropy": 2.575223445892334, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21232447028160095, |
| "loss/reg": 0.15049099922180176, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.00475, |
| "grad_norm": 1.8399676084518433, |
| "grad_norm_var": 0.02086003684265331, |
| "learning_rate": 0.1, |
| "loss": 2.7111, |
| "loss/crossentropy": 2.4847216606140137, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2263471782207489, |
| "loss/reg": 0.15055295825004578, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.00476, |
| "grad_norm": 1.8152645826339722, |
| "grad_norm_var": 0.019380188745528847, |
| "learning_rate": 0.1, |
| "loss": 2.8008, |
| "loss/crossentropy": 2.5698788166046143, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23094817996025085, |
| "loss/reg": 0.15049852430820465, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.00477, |
| "grad_norm": 2.0760440826416016, |
| "grad_norm_var": 0.022572985911380704, |
| "learning_rate": 0.1, |
| "loss": 2.9553, |
| "loss/crossentropy": 2.689013719558716, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2662630081176758, |
| "loss/reg": 0.15053145587444305, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.00478, |
| "grad_norm": 1.6922051906585693, |
| "grad_norm_var": 0.023845456937084404, |
| "learning_rate": 0.1, |
| "loss": 2.7501, |
| "loss/crossentropy": 2.5473060607910156, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2027662843465805, |
| "loss/reg": 0.15050651133060455, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.00479, |
| "grad_norm": 1.8537479639053345, |
| "grad_norm_var": 0.022272028185081658, |
| "learning_rate": 0.1, |
| "loss": 2.7193, |
| "loss/crossentropy": 2.505497932434082, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21378740668296814, |
| "loss/reg": 0.15051040053367615, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 1.5591990947723389, |
| "grad_norm_var": 0.027213395861877862, |
| "learning_rate": 0.1, |
| "loss": 2.6788, |
| "loss/crossentropy": 2.4942548274993896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18459364771842957, |
| "loss/reg": 0.15059490501880646, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.00481, |
| "grad_norm": 1.6547515392303467, |
| "grad_norm_var": 0.021556977582297538, |
| "learning_rate": 0.1, |
| "loss": 2.6586, |
| "loss/crossentropy": 2.442537784576416, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21609297394752502, |
| "loss/reg": 0.15053291618824005, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.00482, |
| "grad_norm": 2.0321552753448486, |
| "grad_norm_var": 0.024344546054114128, |
| "learning_rate": 0.1, |
| "loss": 2.9309, |
| "loss/crossentropy": 2.6964850425720215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23440152406692505, |
| "loss/reg": 0.15054738521575928, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.00483, |
| "grad_norm": 1.9001578092575073, |
| "grad_norm_var": 0.023847957715966515, |
| "learning_rate": 0.1, |
| "loss": 2.7603, |
| "loss/crossentropy": 2.5152156352996826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24511078000068665, |
| "loss/reg": 0.15041695535182953, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.00484, |
| "grad_norm": 1.9443492889404297, |
| "grad_norm_var": 0.022938843395040407, |
| "learning_rate": 0.1, |
| "loss": 3.0713, |
| "loss/crossentropy": 2.81977915763855, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2515028417110443, |
| "loss/reg": 0.1507096290588379, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.00485, |
| "grad_norm": 1.9886903762817383, |
| "grad_norm_var": 0.020291671672334092, |
| "learning_rate": 0.1, |
| "loss": 2.6992, |
| "loss/crossentropy": 2.460703134536743, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2384495735168457, |
| "loss/reg": 0.15053103864192963, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.00486, |
| "grad_norm": 1.7913970947265625, |
| "grad_norm_var": 0.020276372616416852, |
| "learning_rate": 0.1, |
| "loss": 2.8018, |
| "loss/crossentropy": 2.578298568725586, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22348617017269135, |
| "loss/reg": 0.15042859315872192, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.00487, |
| "grad_norm": 1.9218800067901611, |
| "grad_norm_var": 0.01933365225855231, |
| "learning_rate": 0.1, |
| "loss": 2.8225, |
| "loss/crossentropy": 2.6075632572174072, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.214961439371109, |
| "loss/reg": 0.15055601298809052, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.00488, |
| "grad_norm": 1.667251706123352, |
| "grad_norm_var": 0.02114476501842398, |
| "learning_rate": 0.1, |
| "loss": 2.8889, |
| "loss/crossentropy": 2.668334722518921, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22056812047958374, |
| "loss/reg": 0.150565505027771, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.00489, |
| "grad_norm": 1.6461654901504517, |
| "grad_norm_var": 0.022644460296343462, |
| "learning_rate": 0.1, |
| "loss": 2.9462, |
| "loss/crossentropy": 2.7367775440216064, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20938341319561005, |
| "loss/reg": 0.15044718980789185, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.0049, |
| "grad_norm": 1.9964977502822876, |
| "grad_norm_var": 0.024387477555111482, |
| "learning_rate": 0.1, |
| "loss": 2.5424, |
| "loss/crossentropy": 2.287998914718628, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2543773055076599, |
| "loss/reg": 0.15040628612041473, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.00491, |
| "grad_norm": 1.64400315284729, |
| "grad_norm_var": 0.026690021600673457, |
| "learning_rate": 0.1, |
| "loss": 2.7833, |
| "loss/crossentropy": 2.5708346366882324, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2124616503715515, |
| "loss/reg": 0.15043997764587402, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.00492, |
| "grad_norm": 1.768173098564148, |
| "grad_norm_var": 0.02688337657083532, |
| "learning_rate": 0.1, |
| "loss": 2.5367, |
| "loss/crossentropy": 2.329653739929199, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20699647068977356, |
| "loss/reg": 0.1505696028470993, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.00493, |
| "grad_norm": 2.027277708053589, |
| "grad_norm_var": 0.025373939997318948, |
| "learning_rate": 0.1, |
| "loss": 2.8643, |
| "loss/crossentropy": 2.6274173259735107, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.236917644739151, |
| "loss/reg": 0.1506776213645935, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.00494, |
| "grad_norm": 2.001610279083252, |
| "grad_norm_var": 0.026167870019287182, |
| "learning_rate": 0.1, |
| "loss": 3.0603, |
| "loss/crossentropy": 2.8126137256622314, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24769023060798645, |
| "loss/reg": 0.15049690008163452, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.00495, |
| "grad_norm": 1.7624340057373047, |
| "grad_norm_var": 0.026489139079841035, |
| "learning_rate": 0.1, |
| "loss": 2.8348, |
| "loss/crossentropy": 2.5987489223480225, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23609665036201477, |
| "loss/reg": 0.15056881308555603, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.00496, |
| "grad_norm": 1.6690481901168823, |
| "grad_norm_var": 0.02325322278794578, |
| "learning_rate": 0.1, |
| "loss": 2.7026, |
| "loss/crossentropy": 2.495326519012451, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20731309056282043, |
| "loss/reg": 0.1504705250263214, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.00497, |
| "grad_norm": 1.5508745908737183, |
| "grad_norm_var": 0.026472451875488842, |
| "learning_rate": 0.1, |
| "loss": 2.5858, |
| "loss/crossentropy": 2.3849008083343506, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20085099339485168, |
| "loss/reg": 0.15054410696029663, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.00498, |
| "grad_norm": 2.0897293090820312, |
| "grad_norm_var": 0.028216140901402238, |
| "learning_rate": 0.1, |
| "loss": 2.6481, |
| "loss/crossentropy": 2.4339215755462646, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21415017545223236, |
| "loss/reg": 0.1504308432340622, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.00499, |
| "grad_norm": 1.9118894338607788, |
| "grad_norm_var": 0.028325731117482118, |
| "learning_rate": 0.1, |
| "loss": 1.7948, |
| "loss/crossentropy": 1.5877398252487183, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20707426965236664, |
| "loss/reg": 0.15049812197685242, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 1.9073582887649536, |
| "grad_norm_var": 0.027878483834308316, |
| "learning_rate": 0.1, |
| "loss": 2.4694, |
| "loss/crossentropy": 2.2375547885894775, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23181632161140442, |
| "loss/reg": 0.15049485862255096, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.00501, |
| "grad_norm": 1.7581777572631836, |
| "grad_norm_var": 0.026445615672309703, |
| "learning_rate": 0.1, |
| "loss": 2.8299, |
| "loss/crossentropy": 2.6286065578460693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20131340622901917, |
| "loss/reg": 0.15045808255672455, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.00502, |
| "grad_norm": 1.7973214387893677, |
| "grad_norm_var": 0.02642552317222305, |
| "learning_rate": 0.1, |
| "loss": 2.9078, |
| "loss/crossentropy": 2.6889216899871826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21888136863708496, |
| "loss/reg": 0.15061047673225403, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.00503, |
| "grad_norm": 1.714178442955017, |
| "grad_norm_var": 0.026299818423801944, |
| "learning_rate": 0.1, |
| "loss": 2.8288, |
| "loss/crossentropy": 2.6132264137268066, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21561884880065918, |
| "loss/reg": 0.15062540769577026, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.00504, |
| "grad_norm": 1.7422972917556763, |
| "grad_norm_var": 0.02525348193026673, |
| "learning_rate": 0.1, |
| "loss": 2.9666, |
| "loss/crossentropy": 2.7446231842041016, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22200658917427063, |
| "loss/reg": 0.15050289034843445, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.00505, |
| "grad_norm": 1.706626057624817, |
| "grad_norm_var": 0.024147590571504906, |
| "learning_rate": 0.1, |
| "loss": 2.6177, |
| "loss/crossentropy": 2.4087107181549072, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20898595452308655, |
| "loss/reg": 0.15063311159610748, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.00506, |
| "grad_norm": 1.723594069480896, |
| "grad_norm_var": 0.022215228736863678, |
| "learning_rate": 0.1, |
| "loss": 2.7098, |
| "loss/crossentropy": 2.4942219257354736, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2155703604221344, |
| "loss/reg": 0.1503981500864029, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.00507, |
| "grad_norm": 1.6111392974853516, |
| "grad_norm_var": 0.022959327106602246, |
| "learning_rate": 0.1, |
| "loss": 2.6213, |
| "loss/crossentropy": 2.4107608795166016, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21055349707603455, |
| "loss/reg": 0.15056003630161285, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.00508, |
| "grad_norm": 1.854455828666687, |
| "grad_norm_var": 0.023100371179813804, |
| "learning_rate": 0.1, |
| "loss": 3.1002, |
| "loss/crossentropy": 2.8853721618652344, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21486949920654297, |
| "loss/reg": 0.1504994034767151, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.00509, |
| "grad_norm": 1.7393577098846436, |
| "grad_norm_var": 0.019623661975633883, |
| "learning_rate": 0.1, |
| "loss": 2.8406, |
| "loss/crossentropy": 2.6134026050567627, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22720998525619507, |
| "loss/reg": 0.1505071520805359, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.0051, |
| "grad_norm": 2.150702476501465, |
| "grad_norm_var": 0.025343663588946203, |
| "learning_rate": 0.1, |
| "loss": 2.3789, |
| "loss/crossentropy": 2.1428346633911133, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23604287207126617, |
| "loss/reg": 0.15055438876152039, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.00511, |
| "grad_norm": 1.7210215330123901, |
| "grad_norm_var": 0.025620034433750462, |
| "learning_rate": 0.1, |
| "loss": 2.564, |
| "loss/crossentropy": 2.3474836349487305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2165040224790573, |
| "loss/reg": 0.15048184990882874, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 1.7947207689285278, |
| "grad_norm_var": 0.024572285038944545, |
| "learning_rate": 0.1, |
| "loss": 2.7962, |
| "loss/crossentropy": 2.58736515045166, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20883789658546448, |
| "loss/reg": 0.15055420994758606, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.00513, |
| "grad_norm": 1.7811931371688843, |
| "grad_norm_var": 0.02028824157179129, |
| "learning_rate": 0.1, |
| "loss": 2.5543, |
| "loss/crossentropy": 2.3294026851654053, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22489222884178162, |
| "loss/reg": 0.1505099982023239, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.00514, |
| "grad_norm": 1.79312264919281, |
| "grad_norm_var": 0.014832270435988448, |
| "learning_rate": 0.1, |
| "loss": 2.847, |
| "loss/crossentropy": 2.619175434112549, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22777985036373138, |
| "loss/reg": 0.1505488157272339, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.00515, |
| "grad_norm": 1.794277310371399, |
| "grad_norm_var": 0.013851205149933083, |
| "learning_rate": 0.1, |
| "loss": 2.836, |
| "loss/crossentropy": 2.612422227859497, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22354549169540405, |
| "loss/reg": 0.15048745274543762, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.00516, |
| "grad_norm": 1.890990972518921, |
| "grad_norm_var": 0.01360495428181275, |
| "learning_rate": 0.1, |
| "loss": 2.7802, |
| "loss/crossentropy": 2.551600456237793, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22858263552188873, |
| "loss/reg": 0.15042901039123535, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.00517, |
| "grad_norm": 1.7929797172546387, |
| "grad_norm_var": 0.013552369121918121, |
| "learning_rate": 0.1, |
| "loss": 2.6871, |
| "loss/crossentropy": 2.4573912620544434, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22968189418315887, |
| "loss/reg": 0.15041229128837585, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.00518, |
| "grad_norm": 1.7669342756271362, |
| "grad_norm_var": 0.013572308014728604, |
| "learning_rate": 0.1, |
| "loss": 2.5649, |
| "loss/crossentropy": 2.3470799922943115, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2178421914577484, |
| "loss/reg": 0.15042619407176971, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.00519, |
| "grad_norm": 1.8683443069458008, |
| "grad_norm_var": 0.013579383622504555, |
| "learning_rate": 0.1, |
| "loss": 2.4593, |
| "loss/crossentropy": 2.2550268173217773, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.204318106174469, |
| "loss/reg": 0.15036290884017944, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.0052, |
| "grad_norm": 1.9807769060134888, |
| "grad_norm_var": 0.015434747812597748, |
| "learning_rate": 0.1, |
| "loss": 2.3579, |
| "loss/crossentropy": 2.111980438232422, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24588754773139954, |
| "loss/reg": 0.15051668882369995, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.00521, |
| "grad_norm": 1.7604345083236694, |
| "grad_norm_var": 0.01486946460703867, |
| "learning_rate": 0.1, |
| "loss": 2.5136, |
| "loss/crossentropy": 2.282618761062622, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23095154762268066, |
| "loss/reg": 0.1504744440317154, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.00522, |
| "grad_norm": 2.045517683029175, |
| "grad_norm_var": 0.017466011263331836, |
| "learning_rate": 0.1, |
| "loss": 2.8904, |
| "loss/crossentropy": 2.655665874481201, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23476678133010864, |
| "loss/reg": 0.15052157640457153, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.00523, |
| "grad_norm": 1.9401181936264038, |
| "grad_norm_var": 0.014449278623527325, |
| "learning_rate": 0.1, |
| "loss": 2.5561, |
| "loss/crossentropy": 2.320192337036133, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23591549694538116, |
| "loss/reg": 0.15058889985084534, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.00524, |
| "grad_norm": 1.5600680112838745, |
| "grad_norm_var": 0.01987475618392954, |
| "learning_rate": 0.1, |
| "loss": 2.8013, |
| "loss/crossentropy": 2.61422061920166, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18709568679332733, |
| "loss/reg": 0.1504358947277069, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.00525, |
| "grad_norm": 2.0901033878326416, |
| "grad_norm_var": 0.02303075346855484, |
| "learning_rate": 0.1, |
| "loss": 3.1315, |
| "loss/crossentropy": 2.916527032852173, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21501925587654114, |
| "loss/reg": 0.15061353147029877, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.00526, |
| "grad_norm": 1.85203218460083, |
| "grad_norm_var": 0.016958023338743782, |
| "learning_rate": 0.1, |
| "loss": 3.2598, |
| "loss/crossentropy": 3.0189459323883057, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24083559215068817, |
| "loss/reg": 0.15049102902412415, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.00527, |
| "grad_norm": 1.9515986442565918, |
| "grad_norm_var": 0.0166372085622602, |
| "learning_rate": 0.1, |
| "loss": 2.9413, |
| "loss/crossentropy": 2.684509038925171, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2568155527114868, |
| "loss/reg": 0.15047329664230347, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.00528, |
| "grad_norm": 1.6318988800048828, |
| "grad_norm_var": 0.01958000324307984, |
| "learning_rate": 0.1, |
| "loss": 2.6457, |
| "loss/crossentropy": 2.4374656677246094, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20827874541282654, |
| "loss/reg": 0.1503596305847168, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.00529, |
| "grad_norm": 1.8789552450180054, |
| "grad_norm_var": 0.01936159881259769, |
| "learning_rate": 0.1, |
| "loss": 2.5864, |
| "loss/crossentropy": 2.3753092288970947, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21107447147369385, |
| "loss/reg": 0.1505071073770523, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.0053, |
| "grad_norm": 1.7717182636260986, |
| "grad_norm_var": 0.019552226873445912, |
| "learning_rate": 0.1, |
| "loss": 2.7842, |
| "loss/crossentropy": 2.5483057498931885, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2358730286359787, |
| "loss/reg": 0.15051117539405823, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.00531, |
| "grad_norm": 1.7700597047805786, |
| "grad_norm_var": 0.019764119533294357, |
| "learning_rate": 0.1, |
| "loss": 2.6906, |
| "loss/crossentropy": 2.474663496017456, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21591679751873016, |
| "loss/reg": 0.15043498575687408, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.00532, |
| "grad_norm": 1.7033973932266235, |
| "grad_norm_var": 0.02086408569283454, |
| "learning_rate": 0.1, |
| "loss": 2.5116, |
| "loss/crossentropy": 2.288952350616455, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22266441583633423, |
| "loss/reg": 0.15041318535804749, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.00533, |
| "grad_norm": 1.7243086099624634, |
| "grad_norm_var": 0.02154638758513029, |
| "learning_rate": 0.1, |
| "loss": 2.7693, |
| "loss/crossentropy": 2.555543899536133, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21376197040081024, |
| "loss/reg": 0.1504937708377838, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.00534, |
| "grad_norm": 1.7111022472381592, |
| "grad_norm_var": 0.022218259979375907, |
| "learning_rate": 0.1, |
| "loss": 2.6313, |
| "loss/crossentropy": 2.4210214614868164, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2102821320295334, |
| "loss/reg": 0.15053224563598633, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.00535, |
| "grad_norm": 1.6556938886642456, |
| "grad_norm_var": 0.023887217363082826, |
| "learning_rate": 0.1, |
| "loss": 2.7054, |
| "loss/crossentropy": 2.501344919204712, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20406366884708405, |
| "loss/reg": 0.15040111541748047, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.00536, |
| "grad_norm": 1.9426474571228027, |
| "grad_norm_var": 0.02313140402708669, |
| "learning_rate": 0.1, |
| "loss": 2.4385, |
| "loss/crossentropy": 2.1744213104248047, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26412445306777954, |
| "loss/reg": 0.15035146474838257, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.00537, |
| "grad_norm": 1.8905344009399414, |
| "grad_norm_var": 0.023297332373288702, |
| "learning_rate": 0.1, |
| "loss": 2.4954, |
| "loss/crossentropy": 2.2780075073242188, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21742412447929382, |
| "loss/reg": 0.15048402547836304, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.00538, |
| "grad_norm": 1.6335569620132446, |
| "grad_norm_var": 0.021516208779054816, |
| "learning_rate": 0.1, |
| "loss": 2.7993, |
| "loss/crossentropy": 2.6101813316345215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18907564878463745, |
| "loss/reg": 0.15043196082115173, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.00539, |
| "grad_norm": 1.873996376991272, |
| "grad_norm_var": 0.02050334156799162, |
| "learning_rate": 0.1, |
| "loss": 2.8086, |
| "loss/crossentropy": 2.5773746967315674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23124699294567108, |
| "loss/reg": 0.15045055747032166, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.0054, |
| "grad_norm": 1.713912844657898, |
| "grad_norm_var": 0.017263949850722802, |
| "learning_rate": 0.1, |
| "loss": 2.8302, |
| "loss/crossentropy": 2.5992825031280518, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23093578219413757, |
| "loss/reg": 0.15050871670246124, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.00541, |
| "grad_norm": 1.9205272197723389, |
| "grad_norm_var": 0.012495586066715228, |
| "learning_rate": 0.1, |
| "loss": 2.7171, |
| "loss/crossentropy": 2.48820424079895, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22887533903121948, |
| "loss/reg": 0.1504470407962799, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.00542, |
| "grad_norm": 5.880000591278076, |
| "grad_norm_var": 1.0603157689875313, |
| "learning_rate": 0.1, |
| "loss": 4.1246, |
| "loss/crossentropy": 3.714735746383667, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.4098797142505646, |
| "loss/reg": 0.15040406584739685, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.00543, |
| "grad_norm": 3.8511059284210205, |
| "grad_norm_var": 1.2632143971518586, |
| "learning_rate": 0.1, |
| "loss": 2.7998, |
| "loss/crossentropy": 2.5449881553649902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2547682523727417, |
| "loss/reg": 0.15046574175357819, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.00544, |
| "grad_norm": 1.7639555931091309, |
| "grad_norm_var": 1.2550130055365682, |
| "learning_rate": 0.1, |
| "loss": 2.737, |
| "loss/crossentropy": 2.530305862426758, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2067255675792694, |
| "loss/reg": 0.15053707361221313, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.00545, |
| "grad_norm": 1.8482774496078491, |
| "grad_norm_var": 1.2562534806702197, |
| "learning_rate": 0.1, |
| "loss": 2.7863, |
| "loss/crossentropy": 2.5641446113586426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22215618193149567, |
| "loss/reg": 0.15041276812553406, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.00546, |
| "grad_norm": 3.74896240234375, |
| "grad_norm_var": 1.3966712353329134, |
| "learning_rate": 0.1, |
| "loss": 3.0849, |
| "loss/crossentropy": 2.7615413665771484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.32336968183517456, |
| "loss/reg": 0.15052559971809387, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.00547, |
| "grad_norm": 1.8097996711730957, |
| "grad_norm_var": 1.394017587802263, |
| "learning_rate": 0.1, |
| "loss": 2.8108, |
| "loss/crossentropy": 2.5829594135284424, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22784125804901123, |
| "loss/reg": 0.15050798654556274, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.00548, |
| "grad_norm": 1.7007442712783813, |
| "grad_norm_var": 1.3942262407905517, |
| "learning_rate": 0.1, |
| "loss": 2.6086, |
| "loss/crossentropy": 2.3981332778930664, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21043838560581207, |
| "loss/reg": 0.15042629837989807, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.00549, |
| "grad_norm": 1.7262417078018188, |
| "grad_norm_var": 1.394080200243891, |
| "learning_rate": 0.1, |
| "loss": 2.8885, |
| "loss/crossentropy": 2.6797494888305664, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20875097811222076, |
| "loss/reg": 0.1504560112953186, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 1.8690731525421143, |
| "grad_norm_var": 1.3834057885630158, |
| "learning_rate": 0.1, |
| "loss": 2.5765, |
| "loss/crossentropy": 2.347946882247925, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2285146713256836, |
| "loss/reg": 0.1504521369934082, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.00551, |
| "grad_norm": 1.573973298072815, |
| "grad_norm_var": 1.3908633591381048, |
| "learning_rate": 0.1, |
| "loss": 2.6205, |
| "loss/crossentropy": 2.4217123985290527, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19874882698059082, |
| "loss/reg": 0.15051519870758057, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.00552, |
| "grad_norm": 1.7464731931686401, |
| "grad_norm_var": 1.4025296088293848, |
| "learning_rate": 0.1, |
| "loss": 2.6228, |
| "loss/crossentropy": 2.411882162094116, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21088907122612, |
| "loss/reg": 0.15038831532001495, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.00553, |
| "grad_norm": 2.1128478050231934, |
| "grad_norm_var": 1.3939423198678347, |
| "learning_rate": 0.1, |
| "loss": 2.7604, |
| "loss/crossentropy": 2.5426371097564697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21778763830661774, |
| "loss/reg": 0.15037667751312256, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.00554, |
| "grad_norm": 1.720565676689148, |
| "grad_norm_var": 1.386703215315498, |
| "learning_rate": 0.1, |
| "loss": 2.5825, |
| "loss/crossentropy": 2.3651721477508545, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21728430688381195, |
| "loss/reg": 0.15038026869297028, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.00555, |
| "grad_norm": 2.273252010345459, |
| "grad_norm_var": 1.3737869698507226, |
| "learning_rate": 0.1, |
| "loss": 2.7941, |
| "loss/crossentropy": 2.5497400760650635, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24432900547981262, |
| "loss/reg": 0.15059784054756165, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.00556, |
| "grad_norm": 1.76582670211792, |
| "grad_norm_var": 1.3696997255027419, |
| "learning_rate": 0.1, |
| "loss": 2.7634, |
| "loss/crossentropy": 2.5483694076538086, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2150714099407196, |
| "loss/reg": 0.15039324760437012, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.00557, |
| "grad_norm": 1.673180341720581, |
| "grad_norm_var": 1.3870929368841167, |
| "learning_rate": 0.1, |
| "loss": 2.7544, |
| "loss/crossentropy": 2.547147512435913, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20727895200252533, |
| "loss/reg": 0.1503019779920578, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.00558, |
| "grad_norm": 1.6230961084365845, |
| "grad_norm_var": 0.49708254616439684, |
| "learning_rate": 0.1, |
| "loss": 2.7736, |
| "loss/crossentropy": 2.5475351810455322, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22604487836360931, |
| "loss/reg": 0.15050086379051208, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.00559, |
| "grad_norm": 1.6277203559875488, |
| "grad_norm_var": 0.27224401882224397, |
| "learning_rate": 0.1, |
| "loss": 2.9473, |
| "loss/crossentropy": 2.7350268363952637, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21227481961250305, |
| "loss/reg": 0.1504877656698227, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.0056, |
| "grad_norm": 1.794036865234375, |
| "grad_norm_var": 0.2717088001170651, |
| "learning_rate": 0.1, |
| "loss": 2.6761, |
| "loss/crossentropy": 2.4449384212493896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23115184903144836, |
| "loss/reg": 0.1504851132631302, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.00561, |
| "grad_norm": 2.073359251022339, |
| "grad_norm_var": 0.27292139398368914, |
| "learning_rate": 0.1, |
| "loss": 2.6494, |
| "loss/crossentropy": 2.412386894226074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23698602616786957, |
| "loss/reg": 0.15044769644737244, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.00562, |
| "grad_norm": 1.7962262630462646, |
| "grad_norm_var": 0.03698653181104016, |
| "learning_rate": 0.1, |
| "loss": 2.6075, |
| "loss/crossentropy": 2.385498523712158, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22196310758590698, |
| "loss/reg": 0.15035328269004822, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.00563, |
| "grad_norm": 1.7103153467178345, |
| "grad_norm_var": 0.037546756534611286, |
| "learning_rate": 0.1, |
| "loss": 2.7383, |
| "loss/crossentropy": 2.5194432735443115, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21885497868061066, |
| "loss/reg": 0.15037952363491058, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.00564, |
| "grad_norm": 1.7675158977508545, |
| "grad_norm_var": 0.03694901870932673, |
| "learning_rate": 0.1, |
| "loss": 2.649, |
| "loss/crossentropy": 2.4430155754089355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20593670010566711, |
| "loss/reg": 0.15050162374973297, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.00565, |
| "grad_norm": 1.8520102500915527, |
| "grad_norm_var": 0.03664447795700827, |
| "learning_rate": 0.1, |
| "loss": 2.9201, |
| "loss/crossentropy": 2.6966400146484375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22341476380825043, |
| "loss/reg": 0.1504211276769638, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.00566, |
| "grad_norm": 2.1652166843414307, |
| "grad_norm_var": 0.04441028535624927, |
| "learning_rate": 0.1, |
| "loss": 2.6703, |
| "loss/crossentropy": 2.426243543624878, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2440837025642395, |
| "loss/reg": 0.15044492483139038, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.00567, |
| "grad_norm": 1.9409525394439697, |
| "grad_norm_var": 0.04031327109130795, |
| "learning_rate": 0.1, |
| "loss": 3.2795, |
| "loss/crossentropy": 3.059960126876831, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21957780420780182, |
| "loss/reg": 0.150351881980896, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.00568, |
| "grad_norm": 1.8442153930664062, |
| "grad_norm_var": 0.039526480835273915, |
| "learning_rate": 0.1, |
| "loss": 2.7309, |
| "loss/crossentropy": 2.512481212615967, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21841123700141907, |
| "loss/reg": 0.15043720602989197, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.00569, |
| "grad_norm": 1.6993646621704102, |
| "grad_norm_var": 0.03620447519628976, |
| "learning_rate": 0.1, |
| "loss": 2.8478, |
| "loss/crossentropy": 2.633608818054199, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21415984630584717, |
| "loss/reg": 0.15044939517974854, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.0057, |
| "grad_norm": 1.7635753154754639, |
| "grad_norm_var": 0.03567573217679201, |
| "learning_rate": 0.1, |
| "loss": 2.8673, |
| "loss/crossentropy": 2.6511528491973877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21612593531608582, |
| "loss/reg": 0.15037575364112854, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.00571, |
| "grad_norm": 1.6693429946899414, |
| "grad_norm_var": 0.023230925941438347, |
| "learning_rate": 0.1, |
| "loss": 2.8964, |
| "loss/crossentropy": 2.6793594360351562, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2170218825340271, |
| "loss/reg": 0.15024608373641968, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.00572, |
| "grad_norm": 1.7233006954193115, |
| "grad_norm_var": 0.023525656962599582, |
| "learning_rate": 0.1, |
| "loss": 2.82, |
| "loss/crossentropy": 2.6005797386169434, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21943430602550507, |
| "loss/reg": 0.1502731442451477, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.00573, |
| "grad_norm": 1.7672616243362427, |
| "grad_norm_var": 0.022548047474856916, |
| "learning_rate": 0.1, |
| "loss": 2.731, |
| "loss/crossentropy": 2.4993269443511963, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2316977083683014, |
| "loss/reg": 0.15043354034423828, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.00574, |
| "grad_norm": 1.8040755987167358, |
| "grad_norm_var": 0.020299940855419092, |
| "learning_rate": 0.1, |
| "loss": 2.8862, |
| "loss/crossentropy": 2.677408456802368, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2087765485048294, |
| "loss/reg": 0.15053480863571167, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.00575, |
| "grad_norm": 1.7925748825073242, |
| "grad_norm_var": 0.017939010971055738, |
| "learning_rate": 0.1, |
| "loss": 2.5294, |
| "loss/crossentropy": 2.308434247970581, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2210111767053604, |
| "loss/reg": 0.15036532282829285, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 1.6256167888641357, |
| "grad_norm_var": 0.02035570572466406, |
| "learning_rate": 0.1, |
| "loss": 2.8715, |
| "loss/crossentropy": 2.660294532775879, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21120679378509521, |
| "loss/reg": 0.15040796995162964, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.00577, |
| "grad_norm": 1.888875961303711, |
| "grad_norm_var": 0.016058476260646572, |
| "learning_rate": 0.1, |
| "loss": 2.886, |
| "loss/crossentropy": 2.6420791149139404, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2438793033361435, |
| "loss/reg": 0.15039470791816711, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.00578, |
| "grad_norm": 1.8073621988296509, |
| "grad_norm_var": 0.01605965470519829, |
| "learning_rate": 0.1, |
| "loss": 3.0179, |
| "loss/crossentropy": 2.768024206161499, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24985936284065247, |
| "loss/reg": 0.15042048692703247, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.00579, |
| "grad_norm": 1.7833328247070312, |
| "grad_norm_var": 0.015506608034829068, |
| "learning_rate": 0.1, |
| "loss": 2.5905, |
| "loss/crossentropy": 2.369215488433838, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2212715446949005, |
| "loss/reg": 0.15054431557655334, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0058, |
| "grad_norm": 1.72854483127594, |
| "grad_norm_var": 0.01580104189404968, |
| "learning_rate": 0.1, |
| "loss": 2.7857, |
| "loss/crossentropy": 2.5796985626220703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20600803196430206, |
| "loss/reg": 0.15038326382637024, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.00581, |
| "grad_norm": 1.7399742603302002, |
| "grad_norm_var": 0.015860541563214526, |
| "learning_rate": 0.1, |
| "loss": 2.625, |
| "loss/crossentropy": 2.4126365184783936, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2123544067144394, |
| "loss/reg": 0.15032103657722473, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.00582, |
| "grad_norm": 1.7021970748901367, |
| "grad_norm_var": 0.006495072386586761, |
| "learning_rate": 0.1, |
| "loss": 2.8773, |
| "loss/crossentropy": 2.66082501411438, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21650926768779755, |
| "loss/reg": 0.15046095848083496, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.00583, |
| "grad_norm": 1.7088143825531006, |
| "grad_norm_var": 0.00449551785662455, |
| "learning_rate": 0.1, |
| "loss": 2.5909, |
| "loss/crossentropy": 2.3720316886901855, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21885687112808228, |
| "loss/reg": 0.15049117803573608, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.00584, |
| "grad_norm": 1.7506239414215088, |
| "grad_norm_var": 0.0039050486764416804, |
| "learning_rate": 0.1, |
| "loss": 2.6865, |
| "loss/crossentropy": 2.4840197563171387, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2024995982646942, |
| "loss/reg": 0.15041285753250122, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.00585, |
| "grad_norm": 1.7845115661621094, |
| "grad_norm_var": 0.0038153594243659937, |
| "learning_rate": 0.1, |
| "loss": 2.6706, |
| "loss/crossentropy": 2.4638872146606445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2067214995622635, |
| "loss/reg": 0.1505003273487091, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.00586, |
| "grad_norm": 1.6626255512237549, |
| "grad_norm_var": 0.004303201789654309, |
| "learning_rate": 0.1, |
| "loss": 2.8504, |
| "loss/crossentropy": 2.6219983100891113, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22836509346961975, |
| "loss/reg": 0.15043099224567413, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.00587, |
| "grad_norm": 1.7280631065368652, |
| "grad_norm_var": 0.003917045404303347, |
| "learning_rate": 0.1, |
| "loss": 2.8777, |
| "loss/crossentropy": 2.6507773399353027, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22695446014404297, |
| "loss/reg": 0.15051300823688507, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.00588, |
| "grad_norm": 1.894371509552002, |
| "grad_norm_var": 0.005140325671326688, |
| "learning_rate": 0.1, |
| "loss": 2.662, |
| "loss/crossentropy": 2.4504082202911377, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21154916286468506, |
| "loss/reg": 0.15039029717445374, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.00589, |
| "grad_norm": 1.6788150072097778, |
| "grad_norm_var": 0.005550120770173332, |
| "learning_rate": 0.1, |
| "loss": 2.2341, |
| "loss/crossentropy": 2.0276424884796143, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20649614930152893, |
| "loss/reg": 0.15035010874271393, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.0059, |
| "grad_norm": 1.766792893409729, |
| "grad_norm_var": 0.005393157534963914, |
| "learning_rate": 0.1, |
| "loss": 2.6975, |
| "loss/crossentropy": 2.480642795562744, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21682170033454895, |
| "loss/reg": 0.15042726695537567, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.00591, |
| "grad_norm": 1.7550671100616455, |
| "grad_norm_var": 0.005281636598468253, |
| "learning_rate": 0.1, |
| "loss": 2.5317, |
| "loss/crossentropy": 2.3306806087493896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20105496048927307, |
| "loss/reg": 0.15039075911045074, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.00592, |
| "grad_norm": 1.8167235851287842, |
| "grad_norm_var": 0.004385951827918196, |
| "learning_rate": 0.1, |
| "loss": 2.7743, |
| "loss/crossentropy": 2.549349546432495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22498971223831177, |
| "loss/reg": 0.15043842792510986, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.00593, |
| "grad_norm": 1.675256609916687, |
| "grad_norm_var": 0.0036326330814161994, |
| "learning_rate": 0.1, |
| "loss": 2.742, |
| "loss/crossentropy": 2.541952133178711, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20006856322288513, |
| "loss/reg": 0.15048488974571228, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.00594, |
| "grad_norm": 1.7346447706222534, |
| "grad_norm_var": 0.0033967025875497234, |
| "learning_rate": 0.1, |
| "loss": 2.6734, |
| "loss/crossentropy": 2.44462251663208, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2287890762090683, |
| "loss/reg": 0.1504419445991516, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.00595, |
| "grad_norm": 2.064833164215088, |
| "grad_norm_var": 0.009810731729368112, |
| "learning_rate": 0.1, |
| "loss": 2.8264, |
| "loss/crossentropy": 2.6124074459075928, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21400804817676544, |
| "loss/reg": 0.15046687424182892, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.00596, |
| "grad_norm": 1.8686548471450806, |
| "grad_norm_var": 0.010412834737775196, |
| "learning_rate": 0.1, |
| "loss": 2.8296, |
| "loss/crossentropy": 2.5996577739715576, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22993618249893188, |
| "loss/reg": 0.15041178464889526, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.00597, |
| "grad_norm": 1.7548919916152954, |
| "grad_norm_var": 0.010365533318138394, |
| "learning_rate": 0.1, |
| "loss": 2.6797, |
| "loss/crossentropy": 2.450674295425415, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22902154922485352, |
| "loss/reg": 0.15047100186347961, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.00598, |
| "grad_norm": 1.6803902387619019, |
| "grad_norm_var": 0.010597282776743515, |
| "learning_rate": 0.1, |
| "loss": 2.697, |
| "loss/crossentropy": 2.4876456260681152, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20940245687961578, |
| "loss/reg": 0.150425985455513, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.00599, |
| "grad_norm": 1.729906678199768, |
| "grad_norm_var": 0.010452122445273205, |
| "learning_rate": 0.1, |
| "loss": 2.5444, |
| "loss/crossentropy": 2.3213446140289307, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22300857305526733, |
| "loss/reg": 0.1504848748445511, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 1.9423574209213257, |
| "grad_norm_var": 0.01221257385764094, |
| "learning_rate": 0.1, |
| "loss": 2.7494, |
| "loss/crossentropy": 2.5295703411102295, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21978840231895447, |
| "loss/reg": 0.1504577398300171, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.00601, |
| "grad_norm": 1.630698800086975, |
| "grad_norm_var": 0.013672919348090317, |
| "learning_rate": 0.1, |
| "loss": 2.7253, |
| "loss/crossentropy": 2.5204498767852783, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20483750104904175, |
| "loss/reg": 0.15046261250972748, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.00602, |
| "grad_norm": 1.9702144861221313, |
| "grad_norm_var": 0.015018191715113894, |
| "learning_rate": 0.1, |
| "loss": 2.8693, |
| "loss/crossentropy": 2.628023147583008, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24132423102855682, |
| "loss/reg": 0.15065088868141174, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.00603, |
| "grad_norm": 1.8371577262878418, |
| "grad_norm_var": 0.014814127622614552, |
| "learning_rate": 0.1, |
| "loss": 2.7595, |
| "loss/crossentropy": 2.5100905895233154, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2494572252035141, |
| "loss/reg": 0.15055446326732635, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.00604, |
| "grad_norm": 1.7778249979019165, |
| "grad_norm_var": 0.01419733582628074, |
| "learning_rate": 0.1, |
| "loss": 2.4033, |
| "loss/crossentropy": 2.1768853664398193, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22642287611961365, |
| "loss/reg": 0.1503826528787613, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.00605, |
| "grad_norm": 1.6742479801177979, |
| "grad_norm_var": 0.014268027427252564, |
| "learning_rate": 0.1, |
| "loss": 2.7379, |
| "loss/crossentropy": 2.5311801433563232, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20671282708644867, |
| "loss/reg": 0.1504843682050705, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.00606, |
| "grad_norm": 1.8549211025238037, |
| "grad_norm_var": 0.014451616525504344, |
| "learning_rate": 0.1, |
| "loss": 2.8057, |
| "loss/crossentropy": 2.5718417167663574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23390711843967438, |
| "loss/reg": 0.15038102865219116, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.00607, |
| "grad_norm": 1.7844935655593872, |
| "grad_norm_var": 0.014337339049285861, |
| "learning_rate": 0.1, |
| "loss": 2.8842, |
| "loss/crossentropy": 2.6632678508758545, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.220954030752182, |
| "loss/reg": 0.1504642218351364, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.00608, |
| "grad_norm": 1.9240717887878418, |
| "grad_norm_var": 0.015299421314448065, |
| "learning_rate": 0.1, |
| "loss": 2.4703, |
| "loss/crossentropy": 2.2404303550720215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22983460128307343, |
| "loss/reg": 0.15049317479133606, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.00609, |
| "grad_norm": 2.095503091812134, |
| "grad_norm_var": 0.018981439596768488, |
| "learning_rate": 0.1, |
| "loss": 2.7636, |
| "loss/crossentropy": 2.5267820358276367, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23685647547245026, |
| "loss/reg": 0.15046550333499908, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.0061, |
| "grad_norm": 1.8964033126831055, |
| "grad_norm_var": 0.018499793757769053, |
| "learning_rate": 0.1, |
| "loss": 2.8777, |
| "loss/crossentropy": 2.6731386184692383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2045557200908661, |
| "loss/reg": 0.1505018025636673, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.00611, |
| "grad_norm": 1.7358745336532593, |
| "grad_norm_var": 0.01552938037081025, |
| "learning_rate": 0.1, |
| "loss": 2.8104, |
| "loss/crossentropy": 2.5920352935791016, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21833744645118713, |
| "loss/reg": 0.15036974847316742, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.00612, |
| "grad_norm": 1.8006587028503418, |
| "grad_norm_var": 0.015398547907339773, |
| "learning_rate": 0.1, |
| "loss": 2.6914, |
| "loss/crossentropy": 2.480757474899292, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2106877863407135, |
| "loss/reg": 0.1505700796842575, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.00613, |
| "grad_norm": 1.9342505931854248, |
| "grad_norm_var": 0.015897530950001956, |
| "learning_rate": 0.1, |
| "loss": 2.7465, |
| "loss/crossentropy": 2.5363821983337402, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2100977599620819, |
| "loss/reg": 0.15044917166233063, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.00614, |
| "grad_norm": 2.967681646347046, |
| "grad_norm_var": 0.09390690062434187, |
| "learning_rate": 0.1, |
| "loss": 2.3674, |
| "loss/crossentropy": 2.1577494144439697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2096954882144928, |
| "loss/reg": 0.15044760704040527, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.00615, |
| "grad_norm": 1.7287908792495728, |
| "grad_norm_var": 0.09393373677991163, |
| "learning_rate": 0.1, |
| "loss": 2.6353, |
| "loss/crossentropy": 2.432443618774414, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20283305644989014, |
| "loss/reg": 0.1505429595708847, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.00616, |
| "grad_norm": 1.7362871170043945, |
| "grad_norm_var": 0.09569041601006324, |
| "learning_rate": 0.1, |
| "loss": 2.6257, |
| "loss/crossentropy": 2.413712978363037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21197879314422607, |
| "loss/reg": 0.15043114125728607, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.00617, |
| "grad_norm": 1.7254228591918945, |
| "grad_norm_var": 0.09289016042040567, |
| "learning_rate": 0.1, |
| "loss": 2.7599, |
| "loss/crossentropy": 2.543215274810791, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21668514609336853, |
| "loss/reg": 0.15049269795417786, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.00618, |
| "grad_norm": 1.8112136125564575, |
| "grad_norm_var": 0.09303972612524654, |
| "learning_rate": 0.1, |
| "loss": 2.5692, |
| "loss/crossentropy": 2.33778715133667, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23140260577201843, |
| "loss/reg": 0.1505073457956314, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.00619, |
| "grad_norm": 1.6486409902572632, |
| "grad_norm_var": 0.09665949160087356, |
| "learning_rate": 0.1, |
| "loss": 2.4607, |
| "loss/crossentropy": 2.2597880363464355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20092655718326569, |
| "loss/reg": 0.15055620670318604, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.0062, |
| "grad_norm": 1.803787350654602, |
| "grad_norm_var": 0.09634440185160903, |
| "learning_rate": 0.1, |
| "loss": 2.5959, |
| "loss/crossentropy": 2.3900070190429688, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2058688998222351, |
| "loss/reg": 0.15050970017910004, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.00621, |
| "grad_norm": 1.740278959274292, |
| "grad_norm_var": 0.09478219191824072, |
| "learning_rate": 0.1, |
| "loss": 2.8202, |
| "loss/crossentropy": 2.6115825176239014, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20861151814460754, |
| "loss/reg": 0.15042869746685028, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.00622, |
| "grad_norm": 1.815534234046936, |
| "grad_norm_var": 0.09504639377343829, |
| "learning_rate": 0.1, |
| "loss": 2.684, |
| "loss/crossentropy": 2.4644486904144287, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21952775120735168, |
| "loss/reg": 0.15048272907733917, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.00623, |
| "grad_norm": 1.872061014175415, |
| "grad_norm_var": 0.09436027338828315, |
| "learning_rate": 0.1, |
| "loss": 2.7925, |
| "loss/crossentropy": 2.598341464996338, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19414326548576355, |
| "loss/reg": 0.1505485475063324, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.00624, |
| "grad_norm": 1.7953898906707764, |
| "grad_norm_var": 0.09480682777860708, |
| "learning_rate": 0.1, |
| "loss": 2.656, |
| "loss/crossentropy": 2.445122241973877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2108311951160431, |
| "loss/reg": 0.1503743678331375, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.00625, |
| "grad_norm": 1.5793187618255615, |
| "grad_norm_var": 0.09674730143599178, |
| "learning_rate": 0.1, |
| "loss": 2.6402, |
| "loss/crossentropy": 2.4466423988342285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19357287883758545, |
| "loss/reg": 0.15043604373931885, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.00626, |
| "grad_norm": 1.71461021900177, |
| "grad_norm_var": 0.09767533951064984, |
| "learning_rate": 0.1, |
| "loss": 2.5503, |
| "loss/crossentropy": 2.3316049575805664, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21871240437030792, |
| "loss/reg": 0.15061518549919128, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.00627, |
| "grad_norm": 1.9560621976852417, |
| "grad_norm_var": 0.09770396143857306, |
| "learning_rate": 0.1, |
| "loss": 2.5754, |
| "loss/crossentropy": 2.3373005390167236, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23814457654953003, |
| "loss/reg": 0.15054671466350555, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.00628, |
| "grad_norm": 2.1807522773742676, |
| "grad_norm_var": 0.1041378434680178, |
| "learning_rate": 0.1, |
| "loss": 2.6235, |
| "loss/crossentropy": 2.3711602687835693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25234442949295044, |
| "loss/reg": 0.1504661738872528, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.00629, |
| "grad_norm": 2.158534526824951, |
| "grad_norm_var": 0.1090348147486641, |
| "learning_rate": 0.1, |
| "loss": 2.5695, |
| "loss/crossentropy": 2.3103621006011963, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2591152787208557, |
| "loss/reg": 0.15059620141983032, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.0063, |
| "grad_norm": 1.7792726755142212, |
| "grad_norm_var": 0.026485228716584666, |
| "learning_rate": 0.1, |
| "loss": 2.7439, |
| "loss/crossentropy": 2.5190913677215576, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22483915090560913, |
| "loss/reg": 0.15051616728305817, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.00631, |
| "grad_norm": 1.7624547481536865, |
| "grad_norm_var": 0.026167434926550884, |
| "learning_rate": 0.1, |
| "loss": 2.4557, |
| "loss/crossentropy": 2.23544979095459, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22029736638069153, |
| "loss/reg": 0.1505715399980545, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.00632, |
| "grad_norm": 1.7388150691986084, |
| "grad_norm_var": 0.02614046867231057, |
| "learning_rate": 0.1, |
| "loss": 2.7742, |
| "loss/crossentropy": 2.5713608264923096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20287533104419708, |
| "loss/reg": 0.15065622329711914, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.00633, |
| "grad_norm": 1.928154706954956, |
| "grad_norm_var": 0.02621667087538378, |
| "learning_rate": 0.1, |
| "loss": 2.981, |
| "loss/crossentropy": 2.7476754188537598, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2333223521709442, |
| "loss/reg": 0.15054082870483398, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.00634, |
| "grad_norm": 1.7194525003433228, |
| "grad_norm_var": 0.026976507769019935, |
| "learning_rate": 0.1, |
| "loss": 2.7195, |
| "loss/crossentropy": 2.5144565105438232, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20507970452308655, |
| "loss/reg": 0.15050184726715088, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.00635, |
| "grad_norm": 1.9527243375778198, |
| "grad_norm_var": 0.025622730802553364, |
| "learning_rate": 0.1, |
| "loss": 2.4941, |
| "loss/crossentropy": 2.2538888454437256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24022068083286285, |
| "loss/reg": 0.15044313669204712, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.00636, |
| "grad_norm": 1.895184874534607, |
| "grad_norm_var": 0.02565995668674829, |
| "learning_rate": 0.1, |
| "loss": 2.641, |
| "loss/crossentropy": 2.394188165664673, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24684296548366547, |
| "loss/reg": 0.1504664123058319, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.00637, |
| "grad_norm": 1.6878516674041748, |
| "grad_norm_var": 0.026593748939749595, |
| "learning_rate": 0.1, |
| "loss": 2.5196, |
| "loss/crossentropy": 2.2998995780944824, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21970722079277039, |
| "loss/reg": 0.15059831738471985, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.00638, |
| "grad_norm": 1.751684308052063, |
| "grad_norm_var": 0.02710800709899619, |
| "learning_rate": 0.1, |
| "loss": 2.6514, |
| "loss/crossentropy": 2.4400861263275146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21130353212356567, |
| "loss/reg": 0.15053114295005798, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.00639, |
| "grad_norm": 1.5815562009811401, |
| "grad_norm_var": 0.03121897374923795, |
| "learning_rate": 0.1, |
| "loss": 2.5084, |
| "loss/crossentropy": 2.3065812587738037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20177721977233887, |
| "loss/reg": 0.15040618181228638, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.7594926357269287, |
| "grad_norm_var": 0.03143579619746409, |
| "learning_rate": 0.1, |
| "loss": 2.7153, |
| "loss/crossentropy": 2.4815940856933594, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23374593257904053, |
| "loss/reg": 0.15048186480998993, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.00641, |
| "grad_norm": 1.8942620754241943, |
| "grad_norm_var": 0.027460301745478925, |
| "learning_rate": 0.1, |
| "loss": 3.011, |
| "loss/crossentropy": 2.7818515300750732, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22913798689842224, |
| "loss/reg": 0.15050029754638672, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.00642, |
| "grad_norm": 1.6803613901138306, |
| "grad_norm_var": 0.028112161912864052, |
| "learning_rate": 0.1, |
| "loss": 3.2002, |
| "loss/crossentropy": 2.9997916221618652, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2003743052482605, |
| "loss/reg": 0.1504283845424652, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.00643, |
| "grad_norm": 1.7545456886291504, |
| "grad_norm_var": 0.027509283036182275, |
| "learning_rate": 0.1, |
| "loss": 2.8288, |
| "loss/crossentropy": 2.599802017211914, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22902776300907135, |
| "loss/reg": 0.15053850412368774, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.00644, |
| "grad_norm": 2.0254580974578857, |
| "grad_norm_var": 0.02168286488064499, |
| "learning_rate": 0.1, |
| "loss": 2.6414, |
| "loss/crossentropy": 2.3858206272125244, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2555301785469055, |
| "loss/reg": 0.15049859881401062, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.00645, |
| "grad_norm": 1.704299807548523, |
| "grad_norm_var": 0.013885219847681422, |
| "learning_rate": 0.1, |
| "loss": 2.8755, |
| "loss/crossentropy": 2.654542922973633, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2209775298833847, |
| "loss/reg": 0.15047897398471832, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.00646, |
| "grad_norm": 1.7236101627349854, |
| "grad_norm_var": 0.014147147604164386, |
| "learning_rate": 0.1, |
| "loss": 2.5764, |
| "loss/crossentropy": 2.3818814754486084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19451555609703064, |
| "loss/reg": 0.15046392381191254, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.00647, |
| "grad_norm": 1.8323906660079956, |
| "grad_norm_var": 0.014242660884257437, |
| "learning_rate": 0.1, |
| "loss": 2.7924, |
| "loss/crossentropy": 2.5812246799468994, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21113528311252594, |
| "loss/reg": 0.1504535675048828, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.00648, |
| "grad_norm": 1.754801869392395, |
| "grad_norm_var": 0.014150883051699203, |
| "learning_rate": 0.1, |
| "loss": 2.6533, |
| "loss/crossentropy": 2.408700942993164, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24459874629974365, |
| "loss/reg": 0.15061791241168976, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.00649, |
| "grad_norm": 1.786933183670044, |
| "grad_norm_var": 0.012802826054046968, |
| "learning_rate": 0.1, |
| "loss": 2.8283, |
| "loss/crossentropy": 2.6018128395080566, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2265368103981018, |
| "loss/reg": 0.15057246387004852, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 1.6831402778625488, |
| "grad_norm_var": 0.013185832588231724, |
| "learning_rate": 0.1, |
| "loss": 2.7365, |
| "loss/crossentropy": 2.50541090965271, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23104819655418396, |
| "loss/reg": 0.1505645513534546, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.00651, |
| "grad_norm": 2.0642776489257812, |
| "grad_norm_var": 0.01654353333608416, |
| "learning_rate": 0.1, |
| "loss": 2.5444, |
| "loss/crossentropy": 2.3005166053771973, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24387487769126892, |
| "loss/reg": 0.15063251554965973, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.00652, |
| "grad_norm": 1.6014286279678345, |
| "grad_norm_var": 0.017669756624436594, |
| "learning_rate": 0.1, |
| "loss": 2.7166, |
| "loss/crossentropy": 2.5170676708221436, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19950222969055176, |
| "loss/reg": 0.15054214000701904, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.00653, |
| "grad_norm": 1.7809066772460938, |
| "grad_norm_var": 0.0172180094041531, |
| "learning_rate": 0.1, |
| "loss": 2.7006, |
| "loss/crossentropy": 2.4897027015686035, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21088573336601257, |
| "loss/reg": 0.15043137967586517, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.00654, |
| "grad_norm": 1.6777292490005493, |
| "grad_norm_var": 0.017776902164607537, |
| "learning_rate": 0.1, |
| "loss": 2.6158, |
| "loss/crossentropy": 2.402358055114746, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21344512701034546, |
| "loss/reg": 0.15037015080451965, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.00655, |
| "grad_norm": 1.9645006656646729, |
| "grad_norm_var": 0.01736775294466663, |
| "learning_rate": 0.1, |
| "loss": 2.4865, |
| "loss/crossentropy": 2.2632296085357666, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22331009805202484, |
| "loss/reg": 0.15043193101882935, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.00656, |
| "grad_norm": 1.6882307529449463, |
| "grad_norm_var": 0.01800359937019659, |
| "learning_rate": 0.1, |
| "loss": 2.5947, |
| "loss/crossentropy": 2.370737075805664, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2239191234111786, |
| "loss/reg": 0.1505919247865677, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.00657, |
| "grad_norm": 1.7314926385879517, |
| "grad_norm_var": 0.017365345697882558, |
| "learning_rate": 0.1, |
| "loss": 2.5544, |
| "loss/crossentropy": 2.327021598815918, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22742240130901337, |
| "loss/reg": 0.15052783489227295, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.00658, |
| "grad_norm": 1.7544704675674438, |
| "grad_norm_var": 0.016740045983794912, |
| "learning_rate": 0.1, |
| "loss": 2.9132, |
| "loss/crossentropy": 2.6941778659820557, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21899715065956116, |
| "loss/reg": 0.15063896775245667, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.00659, |
| "grad_norm": 1.6882257461547852, |
| "grad_norm_var": 0.017266673321186882, |
| "learning_rate": 0.1, |
| "loss": 2.6275, |
| "loss/crossentropy": 2.435107469558716, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19235289096832275, |
| "loss/reg": 0.15053533017635345, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.0066, |
| "grad_norm": 1.6298198699951172, |
| "grad_norm_var": 0.01404173937022121, |
| "learning_rate": 0.1, |
| "loss": 2.6481, |
| "loss/crossentropy": 2.446171522140503, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2019185721874237, |
| "loss/reg": 0.15053869783878326, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.00661, |
| "grad_norm": 2.2420670986175537, |
| "grad_norm_var": 0.028542604315047272, |
| "learning_rate": 0.1, |
| "loss": 2.514, |
| "loss/crossentropy": 2.2851922512054443, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2288368046283722, |
| "loss/reg": 0.1505124419927597, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.00662, |
| "grad_norm": 4.139839172363281, |
| "grad_norm_var": 0.3727637149283026, |
| "learning_rate": 0.1, |
| "loss": 2.6724, |
| "loss/crossentropy": 2.416794538497925, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25560256838798523, |
| "loss/reg": 0.15067332983016968, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.00663, |
| "grad_norm": 2.059591054916382, |
| "grad_norm_var": 0.37276749963422107, |
| "learning_rate": 0.1, |
| "loss": 2.8028, |
| "loss/crossentropy": 2.537842035293579, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26493510603904724, |
| "loss/reg": 0.15061353147029877, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.00664, |
| "grad_norm": 1.897710919380188, |
| "grad_norm_var": 0.3702680117567055, |
| "learning_rate": 0.1, |
| "loss": 2.5087, |
| "loss/crossentropy": 2.289344072341919, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2193707376718521, |
| "loss/reg": 0.15053333342075348, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.00665, |
| "grad_norm": 1.9124773740768433, |
| "grad_norm_var": 0.3683243243769103, |
| "learning_rate": 0.1, |
| "loss": 2.4851, |
| "loss/crossentropy": 2.273557186126709, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2115914225578308, |
| "loss/reg": 0.15059436857700348, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.00666, |
| "grad_norm": 1.8667151927947998, |
| "grad_norm_var": 0.3634154517042549, |
| "learning_rate": 0.1, |
| "loss": 2.4787, |
| "loss/crossentropy": 2.2673990726470947, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2113196849822998, |
| "loss/reg": 0.1507449448108673, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.00667, |
| "grad_norm": 1.8310761451721191, |
| "grad_norm_var": 0.36423175812154585, |
| "learning_rate": 0.1, |
| "loss": 2.5986, |
| "loss/crossentropy": 2.3619420528411865, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23666906356811523, |
| "loss/reg": 0.15053722262382507, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.00668, |
| "grad_norm": 1.8773390054702759, |
| "grad_norm_var": 0.35555415654112016, |
| "learning_rate": 0.1, |
| "loss": 2.7032, |
| "loss/crossentropy": 2.4824411869049072, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22079603374004364, |
| "loss/reg": 0.15051327645778656, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.00669, |
| "grad_norm": 1.8660446405410767, |
| "grad_norm_var": 0.353703008567065, |
| "learning_rate": 0.1, |
| "loss": 2.8352, |
| "loss/crossentropy": 2.5858943462371826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2492859810590744, |
| "loss/reg": 0.15065379440784454, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.0067, |
| "grad_norm": 3.5797994136810303, |
| "grad_norm_var": 0.5008259837056632, |
| "learning_rate": 0.1, |
| "loss": 2.9317, |
| "loss/crossentropy": 2.6138155460357666, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.31786972284317017, |
| "loss/reg": 0.15055862069129944, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.00671, |
| "grad_norm": 1.70569908618927, |
| "grad_norm_var": 0.509966858411497, |
| "learning_rate": 0.1, |
| "loss": 2.6938, |
| "loss/crossentropy": 2.471672534942627, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22217261791229248, |
| "loss/reg": 0.1505526900291443, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.00672, |
| "grad_norm": 1.9802297353744507, |
| "grad_norm_var": 0.4995792034805987, |
| "learning_rate": 0.1, |
| "loss": 2.6895, |
| "loss/crossentropy": 2.4819087982177734, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20755207538604736, |
| "loss/reg": 0.15057773888111115, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.00673, |
| "grad_norm": 2.019355535507202, |
| "grad_norm_var": 0.49022427435800414, |
| "learning_rate": 0.1, |
| "loss": 2.3521, |
| "loss/crossentropy": 2.1139144897460938, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2382015883922577, |
| "loss/reg": 0.15051567554473877, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.00674, |
| "grad_norm": 1.7466254234313965, |
| "grad_norm_var": 0.4906189958452548, |
| "learning_rate": 0.1, |
| "loss": 2.6816, |
| "loss/crossentropy": 2.449763059616089, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23188048601150513, |
| "loss/reg": 0.1506304293870926, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.00675, |
| "grad_norm": 1.7460286617279053, |
| "grad_norm_var": 0.4874410485752129, |
| "learning_rate": 0.1, |
| "loss": 2.6534, |
| "loss/crossentropy": 2.4320483207702637, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22134453058242798, |
| "loss/reg": 0.15045788884162903, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.00676, |
| "grad_norm": 2.7436137199401855, |
| "grad_norm_var": 0.490505406068187, |
| "learning_rate": 0.1, |
| "loss": 3.1576, |
| "loss/crossentropy": 2.9072084426879883, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2504385709762573, |
| "loss/reg": 0.15065650641918182, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.00677, |
| "grad_norm": 2.1897919178009033, |
| "grad_norm_var": 0.49038918198669207, |
| "learning_rate": 0.1, |
| "loss": 2.8491, |
| "loss/crossentropy": 2.6089799404144287, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24013099074363708, |
| "loss/reg": 0.15056735277175903, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.00678, |
| "grad_norm": 1.8735142946243286, |
| "grad_norm_var": 0.22451049745490786, |
| "learning_rate": 0.1, |
| "loss": 2.7777, |
| "loss/crossentropy": 2.5459609031677246, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23177331686019897, |
| "loss/reg": 0.15059608221054077, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.00679, |
| "grad_norm": 1.857632040977478, |
| "grad_norm_var": 0.226962360409406, |
| "learning_rate": 0.1, |
| "loss": 2.6109, |
| "loss/crossentropy": 2.378469944000244, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.232460156083107, |
| "loss/reg": 0.1506367027759552, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.0068, |
| "grad_norm": 1.7168678045272827, |
| "grad_norm_var": 0.23251816495223518, |
| "learning_rate": 0.1, |
| "loss": 2.8011, |
| "loss/crossentropy": 2.5831122398376465, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2180117666721344, |
| "loss/reg": 0.15074624121189117, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.00681, |
| "grad_norm": 1.74824059009552, |
| "grad_norm_var": 0.23682246600775347, |
| "learning_rate": 0.1, |
| "loss": 2.6421, |
| "loss/crossentropy": 2.4197237491607666, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2223721146583557, |
| "loss/reg": 0.15060542523860931, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.00682, |
| "grad_norm": 1.7029216289520264, |
| "grad_norm_var": 0.24188584627357745, |
| "learning_rate": 0.1, |
| "loss": 2.6599, |
| "loss/crossentropy": 2.430511474609375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22943615913391113, |
| "loss/reg": 0.150690495967865, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.00683, |
| "grad_norm": 1.867256999015808, |
| "grad_norm_var": 0.24109704187572156, |
| "learning_rate": 0.1, |
| "loss": 2.5106, |
| "loss/crossentropy": 2.292717695236206, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21792888641357422, |
| "loss/reg": 0.150562584400177, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.00684, |
| "grad_norm": 1.7435557842254639, |
| "grad_norm_var": 0.2446500015576324, |
| "learning_rate": 0.1, |
| "loss": 2.7473, |
| "loss/crossentropy": 2.5527687072753906, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19448453187942505, |
| "loss/reg": 0.15050280094146729, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.00685, |
| "grad_norm": 1.9648960828781128, |
| "grad_norm_var": 0.24342335630761697, |
| "learning_rate": 0.1, |
| "loss": 2.8446, |
| "loss/crossentropy": 2.6248083114624023, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2198096513748169, |
| "loss/reg": 0.150659441947937, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.00686, |
| "grad_norm": 1.7134580612182617, |
| "grad_norm_var": 0.07089251322989772, |
| "learning_rate": 0.1, |
| "loss": 2.6458, |
| "loss/crossentropy": 2.423866033554077, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22189542651176453, |
| "loss/reg": 0.15065249800682068, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.00687, |
| "grad_norm": 1.7188957929611206, |
| "grad_norm_var": 0.07057034569102877, |
| "learning_rate": 0.1, |
| "loss": 2.749, |
| "loss/crossentropy": 2.5217947959899902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22716712951660156, |
| "loss/reg": 0.1506287008523941, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.00688, |
| "grad_norm": 1.5421242713928223, |
| "grad_norm_var": 0.07763479369512485, |
| "learning_rate": 0.1, |
| "loss": 2.5266, |
| "loss/crossentropy": 2.321230888366699, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20540964603424072, |
| "loss/reg": 0.1507095992565155, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.00689, |
| "grad_norm": 1.5375348329544067, |
| "grad_norm_var": 0.08244796292507293, |
| "learning_rate": 0.1, |
| "loss": 2.7298, |
| "loss/crossentropy": 2.549647808074951, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18018591403961182, |
| "loss/reg": 0.15057538449764252, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.0069, |
| "grad_norm": 1.8091762065887451, |
| "grad_norm_var": 0.0819278426034316, |
| "learning_rate": 0.1, |
| "loss": 2.8985, |
| "loss/crossentropy": 2.657491445541382, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24102391302585602, |
| "loss/reg": 0.15060631930828094, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.00691, |
| "grad_norm": 1.6616994142532349, |
| "grad_norm_var": 0.08345386428505268, |
| "learning_rate": 0.1, |
| "loss": 2.7162, |
| "loss/crossentropy": 2.5117480754852295, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.204423189163208, |
| "loss/reg": 0.15065263211727142, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.00692, |
| "grad_norm": 1.820202112197876, |
| "grad_norm_var": 0.02511692798290799, |
| "learning_rate": 0.1, |
| "loss": 2.9677, |
| "loss/crossentropy": 2.7340517044067383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23362231254577637, |
| "loss/reg": 0.15043103694915771, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.00693, |
| "grad_norm": 2.9105007648468018, |
| "grad_norm_var": 0.09703297561678799, |
| "learning_rate": 0.1, |
| "loss": 2.9835, |
| "loss/crossentropy": 2.728137969970703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25533464550971985, |
| "loss/reg": 0.1506151705980301, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.00694, |
| "grad_norm": 1.910051703453064, |
| "grad_norm_var": 0.09735626549085855, |
| "learning_rate": 0.1, |
| "loss": 3.0494, |
| "loss/crossentropy": 2.829622268676758, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21980157494544983, |
| "loss/reg": 0.1506633758544922, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.00695, |
| "grad_norm": 2.1285786628723145, |
| "grad_norm_var": 0.1030669131269813, |
| "learning_rate": 0.1, |
| "loss": 2.6724, |
| "loss/crossentropy": 2.455028533935547, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21733978390693665, |
| "loss/reg": 0.15069471299648285, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.00696, |
| "grad_norm": 1.655630350112915, |
| "grad_norm_var": 0.10433522079836018, |
| "learning_rate": 0.1, |
| "loss": 2.753, |
| "loss/crossentropy": 2.5490458011627197, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2039644718170166, |
| "loss/reg": 0.15064069628715515, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.00697, |
| "grad_norm": 1.6365665197372437, |
| "grad_norm_var": 0.10647604003532596, |
| "learning_rate": 0.1, |
| "loss": 2.8397, |
| "loss/crossentropy": 2.6311004161834717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20860815048217773, |
| "loss/reg": 0.15066352486610413, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.00698, |
| "grad_norm": 1.7443971633911133, |
| "grad_norm_var": 0.10586592226811238, |
| "learning_rate": 0.1, |
| "loss": 2.786, |
| "loss/crossentropy": 2.553623914718628, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23234260082244873, |
| "loss/reg": 0.15058587491512299, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.00699, |
| "grad_norm": 2.0587990283966064, |
| "grad_norm_var": 0.1089755312684718, |
| "learning_rate": 0.1, |
| "loss": 2.9876, |
| "loss/crossentropy": 2.7421212196350098, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2454337775707245, |
| "loss/reg": 0.1506195068359375, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 1.78428053855896, |
| "grad_norm_var": 0.10851610902069879, |
| "learning_rate": 0.1, |
| "loss": 2.8851, |
| "loss/crossentropy": 2.6433374881744385, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24180544912815094, |
| "loss/reg": 0.1507207304239273, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.00701, |
| "grad_norm": 1.764556646347046, |
| "grad_norm_var": 0.10795015003527902, |
| "learning_rate": 0.1, |
| "loss": 2.5707, |
| "loss/crossentropy": 2.3606631755828857, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2100578397512436, |
| "loss/reg": 0.1505555659532547, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.00702, |
| "grad_norm": 1.5967587232589722, |
| "grad_norm_var": 0.11072795227774085, |
| "learning_rate": 0.1, |
| "loss": 2.6641, |
| "loss/crossentropy": 2.445234775543213, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21888557076454163, |
| "loss/reg": 0.15052653849124908, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.00703, |
| "grad_norm": 1.912024974822998, |
| "grad_norm_var": 0.11019853500888856, |
| "learning_rate": 0.1, |
| "loss": 2.8306, |
| "loss/crossentropy": 2.6104273796081543, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22021466493606567, |
| "loss/reg": 0.1506466418504715, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 1.9255599975585938, |
| "grad_norm_var": 0.10405357608890325, |
| "learning_rate": 0.1, |
| "loss": 2.917, |
| "loss/crossentropy": 2.670037269592285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24695564806461334, |
| "loss/reg": 0.1506807804107666, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.00705, |
| "grad_norm": 2.0575363636016846, |
| "grad_norm_var": 0.09817864718948097, |
| "learning_rate": 0.1, |
| "loss": 2.6939, |
| "loss/crossentropy": 2.4582855701446533, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2355845868587494, |
| "loss/reg": 0.15073120594024658, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.00706, |
| "grad_norm": 1.7972407341003418, |
| "grad_norm_var": 0.0983297319609998, |
| "learning_rate": 0.1, |
| "loss": 2.7488, |
| "loss/crossentropy": 2.535000801086426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21378938853740692, |
| "loss/reg": 0.15066561102867126, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.00707, |
| "grad_norm": 1.8415987491607666, |
| "grad_norm_var": 0.09468984662132028, |
| "learning_rate": 0.1, |
| "loss": 2.7871, |
| "loss/crossentropy": 2.557908058166504, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22916267812252045, |
| "loss/reg": 0.1507493555545807, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.00708, |
| "grad_norm": 2.703770637512207, |
| "grad_norm_var": 0.1330198938128944, |
| "learning_rate": 0.1, |
| "loss": 3.3083, |
| "loss/crossentropy": 3.0656118392944336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2426561862230301, |
| "loss/reg": 0.1506742686033249, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.00709, |
| "grad_norm": 2.5990288257598877, |
| "grad_norm_var": 0.09978552349005358, |
| "learning_rate": 0.1, |
| "loss": 3.2378, |
| "loss/crossentropy": 2.9643797874450684, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2734586298465729, |
| "loss/reg": 0.15072692930698395, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.0071, |
| "grad_norm": 2.03259539604187, |
| "grad_norm_var": 0.10015675462682694, |
| "learning_rate": 0.1, |
| "loss": 2.6593, |
| "loss/crossentropy": 2.450859546661377, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.208455890417099, |
| "loss/reg": 0.15072157979011536, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.00711, |
| "grad_norm": 1.8557087182998657, |
| "grad_norm_var": 0.09840172174536448, |
| "learning_rate": 0.1, |
| "loss": 2.7195, |
| "loss/crossentropy": 2.491011619567871, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22851739823818207, |
| "loss/reg": 0.15064258873462677, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.00712, |
| "grad_norm": 1.7069950103759766, |
| "grad_norm_var": 0.09665072923569268, |
| "learning_rate": 0.1, |
| "loss": 2.8309, |
| "loss/crossentropy": 2.623868465423584, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20702505111694336, |
| "loss/reg": 0.1506524235010147, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.00713, |
| "grad_norm": 1.8157384395599365, |
| "grad_norm_var": 0.09144195619162569, |
| "learning_rate": 0.1, |
| "loss": 3.1134, |
| "loss/crossentropy": 2.907552480697632, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20585530996322632, |
| "loss/reg": 0.15065917372703552, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.00714, |
| "grad_norm": 2.0031638145446777, |
| "grad_norm_var": 0.08854056649353008, |
| "learning_rate": 0.1, |
| "loss": 2.7384, |
| "loss/crossentropy": 2.4970147609710693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24138247966766357, |
| "loss/reg": 0.15071015059947968, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.00715, |
| "grad_norm": 1.846663236618042, |
| "grad_norm_var": 0.08872722934098362, |
| "learning_rate": 0.1, |
| "loss": 2.7213, |
| "loss/crossentropy": 2.5050697326660156, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21625757217407227, |
| "loss/reg": 0.15064217150211334, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.00716, |
| "grad_norm": 1.7616455554962158, |
| "grad_norm_var": 0.08926754422346335, |
| "learning_rate": 0.1, |
| "loss": 2.6102, |
| "loss/crossentropy": 2.3879828453063965, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22224101424217224, |
| "loss/reg": 0.15073584020137787, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.00717, |
| "grad_norm": 1.7401652336120605, |
| "grad_norm_var": 0.08991200906485777, |
| "learning_rate": 0.1, |
| "loss": 2.5976, |
| "loss/crossentropy": 2.374809741973877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22283729910850525, |
| "loss/reg": 0.15067793428897858, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.00718, |
| "grad_norm": 1.7945215702056885, |
| "grad_norm_var": 0.08304826467097251, |
| "learning_rate": 0.1, |
| "loss": 2.7866, |
| "loss/crossentropy": 2.565732479095459, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22087079286575317, |
| "loss/reg": 0.15080523490905762, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.00719, |
| "grad_norm": 1.6868669986724854, |
| "grad_norm_var": 0.08772074764762383, |
| "learning_rate": 0.1, |
| "loss": 2.5593, |
| "loss/crossentropy": 2.3386070728302, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22067709267139435, |
| "loss/reg": 0.1505899578332901, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.0072, |
| "grad_norm": 1.7713934183120728, |
| "grad_norm_var": 0.0896684993413345, |
| "learning_rate": 0.1, |
| "loss": 2.714, |
| "loss/crossentropy": 2.4799399375915527, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2340325117111206, |
| "loss/reg": 0.15069106221199036, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.00721, |
| "grad_norm": 1.7177095413208008, |
| "grad_norm_var": 0.0914887025673842, |
| "learning_rate": 0.1, |
| "loss": 2.5053, |
| "loss/crossentropy": 2.2811334133148193, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22420024871826172, |
| "loss/reg": 0.1506507694721222, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.00722, |
| "grad_norm": 1.929278016090393, |
| "grad_norm_var": 0.09046687207279777, |
| "learning_rate": 0.1, |
| "loss": 2.7308, |
| "loss/crossentropy": 2.4492602348327637, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.28157472610473633, |
| "loss/reg": 0.15051567554473877, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.00723, |
| "grad_norm": 1.8084168434143066, |
| "grad_norm_var": 0.0909065675708949, |
| "learning_rate": 0.1, |
| "loss": 2.7425, |
| "loss/crossentropy": 2.5114240646362305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23111049830913544, |
| "loss/reg": 0.15071170032024384, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.00724, |
| "grad_norm": 2.3674378395080566, |
| "grad_norm_var": 0.06297924947749506, |
| "learning_rate": 0.1, |
| "loss": 2.6202, |
| "loss/crossentropy": 2.3838629722595215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2363368421792984, |
| "loss/reg": 0.15078045427799225, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.00725, |
| "grad_norm": 2.6291730403900146, |
| "grad_norm_var": 0.06583622126769857, |
| "learning_rate": 0.1, |
| "loss": 3.4422, |
| "loss/crossentropy": 3.19541597366333, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24682898819446564, |
| "loss/reg": 0.15069250762462616, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.00726, |
| "grad_norm": 1.7215780019760132, |
| "grad_norm_var": 0.06655823983164146, |
| "learning_rate": 0.1, |
| "loss": 2.9263, |
| "loss/crossentropy": 2.691481590270996, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23485758900642395, |
| "loss/reg": 0.15075919032096863, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.00727, |
| "grad_norm": 2.1336874961853027, |
| "grad_norm_var": 0.07031031641084264, |
| "learning_rate": 0.1, |
| "loss": 2.6517, |
| "loss/crossentropy": 2.4191596508026123, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23250731825828552, |
| "loss/reg": 0.15062490105628967, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.00728, |
| "grad_norm": 1.7764811515808105, |
| "grad_norm_var": 0.0688039913071212, |
| "learning_rate": 0.1, |
| "loss": 2.7631, |
| "loss/crossentropy": 2.5292651653289795, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23384268581867218, |
| "loss/reg": 0.1506718546152115, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.00729, |
| "grad_norm": 1.680443286895752, |
| "grad_norm_var": 0.07158522986901653, |
| "learning_rate": 0.1, |
| "loss": 2.8144, |
| "loss/crossentropy": 2.5982909202575684, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2161257266998291, |
| "loss/reg": 0.15059664845466614, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.0073, |
| "grad_norm": 1.684423804283142, |
| "grad_norm_var": 0.07346726768252339, |
| "learning_rate": 0.1, |
| "loss": 2.4953, |
| "loss/crossentropy": 2.2854747772216797, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20979759097099304, |
| "loss/reg": 0.15069302916526794, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.00731, |
| "grad_norm": 1.695099115371704, |
| "grad_norm_var": 0.07553864925794543, |
| "learning_rate": 0.1, |
| "loss": 2.6448, |
| "loss/crossentropy": 2.431910276412964, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21286997199058533, |
| "loss/reg": 0.15078265964984894, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.00732, |
| "grad_norm": 1.7980223894119263, |
| "grad_norm_var": 0.07510238013936946, |
| "learning_rate": 0.1, |
| "loss": 2.5763, |
| "loss/crossentropy": 2.355053186416626, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22128906846046448, |
| "loss/reg": 0.15057377517223358, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.00733, |
| "grad_norm": 1.9372049570083618, |
| "grad_norm_var": 0.07409377306537764, |
| "learning_rate": 0.1, |
| "loss": 2.8292, |
| "loss/crossentropy": 2.594406843185425, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23475268483161926, |
| "loss/reg": 0.15064290165901184, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.00734, |
| "grad_norm": 1.6604641675949097, |
| "grad_norm_var": 0.07680265185979487, |
| "learning_rate": 0.1, |
| "loss": 2.8441, |
| "loss/crossentropy": 2.64202618598938, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20209267735481262, |
| "loss/reg": 0.15082409977912903, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.00735, |
| "grad_norm": 1.7398064136505127, |
| "grad_norm_var": 0.07565088320833831, |
| "learning_rate": 0.1, |
| "loss": 2.509, |
| "loss/crossentropy": 2.288006544113159, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22095456719398499, |
| "loss/reg": 0.150763601064682, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.00736, |
| "grad_norm": 1.6285425424575806, |
| "grad_norm_var": 0.07895991227490946, |
| "learning_rate": 0.1, |
| "loss": 2.6347, |
| "loss/crossentropy": 2.415604829788208, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21912458539009094, |
| "loss/reg": 0.15072403848171234, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.00737, |
| "grad_norm": 1.597941279411316, |
| "grad_norm_var": 0.08227617359328458, |
| "learning_rate": 0.1, |
| "loss": 2.7248, |
| "loss/crossentropy": 2.5091822147369385, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21559391915798187, |
| "loss/reg": 0.15077482163906097, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.00738, |
| "grad_norm": 1.7690709829330444, |
| "grad_norm_var": 0.08243785564787534, |
| "learning_rate": 0.1, |
| "loss": 2.6722, |
| "loss/crossentropy": 2.4468064308166504, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22537848353385925, |
| "loss/reg": 0.15076465904712677, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.00739, |
| "grad_norm": 1.6228259801864624, |
| "grad_norm_var": 0.08566258241000539, |
| "learning_rate": 0.1, |
| "loss": 2.4894, |
| "loss/crossentropy": 2.2784922122955322, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21092478930950165, |
| "loss/reg": 0.1509304940700531, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.0074, |
| "grad_norm": 1.7106890678405762, |
| "grad_norm_var": 0.06644618342732442, |
| "learning_rate": 0.1, |
| "loss": 2.736, |
| "loss/crossentropy": 2.500734806060791, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2352195829153061, |
| "loss/reg": 0.15075266361236572, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.00741, |
| "grad_norm": 1.604475498199463, |
| "grad_norm_var": 0.018660409189223928, |
| "learning_rate": 0.1, |
| "loss": 2.5904, |
| "loss/crossentropy": 2.382263422012329, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20810920000076294, |
| "loss/reg": 0.15070372819900513, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.00742, |
| "grad_norm": 1.908246636390686, |
| "grad_norm_var": 0.020502994520018032, |
| "learning_rate": 0.1, |
| "loss": 2.7611, |
| "loss/crossentropy": 2.5475547313690186, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21356894075870514, |
| "loss/reg": 0.15075084567070007, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.00743, |
| "grad_norm": 1.693289875984192, |
| "grad_norm_var": 0.009901915429343452, |
| "learning_rate": 0.1, |
| "loss": 2.6589, |
| "loss/crossentropy": 2.4369637966156006, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22193092107772827, |
| "loss/reg": 0.15087561309337616, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.00744, |
| "grad_norm": 2.214367151260376, |
| "grad_norm_var": 0.025230904041631894, |
| "learning_rate": 0.1, |
| "loss": 2.5237, |
| "loss/crossentropy": 2.2560694217681885, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26764050126075745, |
| "loss/reg": 0.15073107182979584, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.00745, |
| "grad_norm": 1.719102144241333, |
| "grad_norm_var": 0.024983526294251608, |
| "learning_rate": 0.1, |
| "loss": 2.6286, |
| "loss/crossentropy": 2.4266693592071533, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20188188552856445, |
| "loss/reg": 0.15068763494491577, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.00746, |
| "grad_norm": 1.711766242980957, |
| "grad_norm_var": 0.024794926617455755, |
| "learning_rate": 0.1, |
| "loss": 2.6786, |
| "loss/crossentropy": 2.469654083251953, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20896171033382416, |
| "loss/reg": 0.15083599090576172, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.00747, |
| "grad_norm": 2.076267957687378, |
| "grad_norm_var": 0.03105066241022308, |
| "learning_rate": 0.1, |
| "loss": 3.1829, |
| "loss/crossentropy": 2.9400455951690674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24282129108905792, |
| "loss/reg": 0.1507270187139511, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.00748, |
| "grad_norm": 2.007382392883301, |
| "grad_norm_var": 0.034446612463121556, |
| "learning_rate": 0.1, |
| "loss": 2.6659, |
| "loss/crossentropy": 2.426269054412842, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23963968455791473, |
| "loss/reg": 0.1508168876171112, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.00749, |
| "grad_norm": 1.7096943855285645, |
| "grad_norm_var": 0.03314315371103849, |
| "learning_rate": 0.1, |
| "loss": 2.9207, |
| "loss/crossentropy": 2.7087104320526123, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21203161776065826, |
| "loss/reg": 0.1506882607936859, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 2.013122797012329, |
| "grad_norm_var": 0.03560716099000061, |
| "learning_rate": 0.1, |
| "loss": 2.7032, |
| "loss/crossentropy": 2.46675968170166, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23640409111976624, |
| "loss/reg": 0.15079770982265472, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.00751, |
| "grad_norm": 1.6599361896514893, |
| "grad_norm_var": 0.0365980279340512, |
| "learning_rate": 0.1, |
| "loss": 2.5192, |
| "loss/crossentropy": 2.3084733486175537, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21076543629169464, |
| "loss/reg": 0.15083208680152893, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.00752, |
| "grad_norm": 1.5895143747329712, |
| "grad_norm_var": 0.03753559890891225, |
| "learning_rate": 0.1, |
| "loss": 2.5766, |
| "loss/crossentropy": 2.363900899887085, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2126864790916443, |
| "loss/reg": 0.15089723467826843, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.00753, |
| "grad_norm": 1.930123209953308, |
| "grad_norm_var": 0.03601512468355637, |
| "learning_rate": 0.1, |
| "loss": 2.6042, |
| "loss/crossentropy": 2.394103527069092, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21011880040168762, |
| "loss/reg": 0.1507357805967331, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.00754, |
| "grad_norm": 1.7516082525253296, |
| "grad_norm_var": 0.03612655285846789, |
| "learning_rate": 0.1, |
| "loss": 2.8153, |
| "loss/crossentropy": 2.6021697521209717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21316707134246826, |
| "loss/reg": 0.15090234577655792, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.00755, |
| "grad_norm": 1.8780457973480225, |
| "grad_norm_var": 0.03390816552193072, |
| "learning_rate": 0.1, |
| "loss": 2.8009, |
| "loss/crossentropy": 2.5766563415527344, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22426337003707886, |
| "loss/reg": 0.15080603957176208, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.00756, |
| "grad_norm": 1.7158637046813965, |
| "grad_norm_var": 0.03383193462138691, |
| "learning_rate": 0.1, |
| "loss": 2.6381, |
| "loss/crossentropy": 2.426301956176758, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21179910004138947, |
| "loss/reg": 0.15098083019256592, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.00757, |
| "grad_norm": 1.8814210891723633, |
| "grad_norm_var": 0.030522188540995348, |
| "learning_rate": 0.1, |
| "loss": 2.6195, |
| "loss/crossentropy": 2.3845207691192627, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2349337488412857, |
| "loss/reg": 0.15090647339820862, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.00758, |
| "grad_norm": 1.7149051427841187, |
| "grad_norm_var": 0.031131000316445483, |
| "learning_rate": 0.1, |
| "loss": 2.8893, |
| "loss/crossentropy": 2.6815686225891113, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2076815366744995, |
| "loss/reg": 0.15091994404792786, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.00759, |
| "grad_norm": 2.171678066253662, |
| "grad_norm_var": 0.036768560899038745, |
| "learning_rate": 0.1, |
| "loss": 3.5486, |
| "loss/crossentropy": 3.3113834857940674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23717352747917175, |
| "loss/reg": 0.15077845752239227, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.0076, |
| "grad_norm": 1.7233777046203613, |
| "grad_norm_var": 0.028574541656923694, |
| "learning_rate": 0.1, |
| "loss": 2.6536, |
| "loss/crossentropy": 2.4277853965759277, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22583560645580292, |
| "loss/reg": 0.15087072551250458, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.00761, |
| "grad_norm": 1.73390531539917, |
| "grad_norm_var": 0.028372583058582517, |
| "learning_rate": 0.1, |
| "loss": 2.6262, |
| "loss/crossentropy": 2.418344736099243, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20788376033306122, |
| "loss/reg": 0.15078125894069672, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.00762, |
| "grad_norm": 1.8074291944503784, |
| "grad_norm_var": 0.027445544805627678, |
| "learning_rate": 0.1, |
| "loss": 2.9124, |
| "loss/crossentropy": 2.700201988220215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21222880482673645, |
| "loss/reg": 0.15085668861865997, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.00763, |
| "grad_norm": 1.7451679706573486, |
| "grad_norm_var": 0.02365786622837991, |
| "learning_rate": 0.1, |
| "loss": 2.5782, |
| "loss/crossentropy": 2.337271213531494, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24097557365894318, |
| "loss/reg": 0.15081782639026642, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.00764, |
| "grad_norm": 1.8473081588745117, |
| "grad_norm_var": 0.021144185924473823, |
| "learning_rate": 0.1, |
| "loss": 2.7639, |
| "loss/crossentropy": 2.546060562133789, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21782688796520233, |
| "loss/reg": 0.15088941156864166, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.00765, |
| "grad_norm": 1.7671552896499634, |
| "grad_norm_var": 0.02062366942713325, |
| "learning_rate": 0.1, |
| "loss": 2.8312, |
| "loss/crossentropy": 2.625101089477539, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2060982584953308, |
| "loss/reg": 0.15080241858959198, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.00766, |
| "grad_norm": 2.232208013534546, |
| "grad_norm_var": 0.02961080356292669, |
| "learning_rate": 0.1, |
| "loss": 2.6053, |
| "loss/crossentropy": 2.3728339672088623, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.232514888048172, |
| "loss/reg": 0.15084950625896454, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.00767, |
| "grad_norm": 1.6949241161346436, |
| "grad_norm_var": 0.028931962276099198, |
| "learning_rate": 0.1, |
| "loss": 2.7234, |
| "loss/crossentropy": 2.511833429336548, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21160735189914703, |
| "loss/reg": 0.15080159902572632, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 2.0308444499969482, |
| "grad_norm_var": 0.02730481565247036, |
| "learning_rate": 0.1, |
| "loss": 2.7227, |
| "loss/crossentropy": 2.5082123279571533, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2145037055015564, |
| "loss/reg": 0.1508137732744217, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.00769, |
| "grad_norm": 1.7126775979995728, |
| "grad_norm_var": 0.027984036388228085, |
| "learning_rate": 0.1, |
| "loss": 2.5284, |
| "loss/crossentropy": 2.292144775390625, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2362442910671234, |
| "loss/reg": 0.15095730125904083, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.0077, |
| "grad_norm": 1.6673574447631836, |
| "grad_norm_var": 0.029398515393834403, |
| "learning_rate": 0.1, |
| "loss": 2.8241, |
| "loss/crossentropy": 2.5989766120910645, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2251165807247162, |
| "loss/reg": 0.15082690119743347, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.00771, |
| "grad_norm": 1.8127816915512085, |
| "grad_norm_var": 0.02927071581589041, |
| "learning_rate": 0.1, |
| "loss": 2.5673, |
| "loss/crossentropy": 2.340287208557129, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2269660383462906, |
| "loss/reg": 0.15091097354888916, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.00772, |
| "grad_norm": 1.8197225332260132, |
| "grad_norm_var": 0.02838251125276064, |
| "learning_rate": 0.1, |
| "loss": 2.8102, |
| "loss/crossentropy": 2.581475019454956, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22869691252708435, |
| "loss/reg": 0.15092697739601135, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.00773, |
| "grad_norm": 2.1682803630828857, |
| "grad_norm_var": 0.03529418992005541, |
| "learning_rate": 0.1, |
| "loss": 3.2411, |
| "loss/crossentropy": 3.0017166137695312, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23941382765769958, |
| "loss/reg": 0.150810107588768, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.00774, |
| "grad_norm": 1.7576192617416382, |
| "grad_norm_var": 0.03462112757389567, |
| "learning_rate": 0.1, |
| "loss": 2.9663, |
| "loss/crossentropy": 2.7609782218933105, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20530001819133759, |
| "loss/reg": 0.1509704887866974, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.00775, |
| "grad_norm": 1.7283459901809692, |
| "grad_norm_var": 0.028231894714512804, |
| "learning_rate": 0.1, |
| "loss": 2.7677, |
| "loss/crossentropy": 2.555812358856201, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21189913153648376, |
| "loss/reg": 0.15091703832149506, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.00776, |
| "grad_norm": 1.8458093404769897, |
| "grad_norm_var": 0.027459734147164263, |
| "learning_rate": 0.1, |
| "loss": 2.7102, |
| "loss/crossentropy": 2.4707260131835938, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.239429771900177, |
| "loss/reg": 0.1509750336408615, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.00777, |
| "grad_norm": 1.7405065298080444, |
| "grad_norm_var": 0.0273728433203269, |
| "learning_rate": 0.1, |
| "loss": 2.5291, |
| "loss/crossentropy": 2.312211036682129, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21687430143356323, |
| "loss/reg": 0.15091697871685028, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.00778, |
| "grad_norm": 1.849686861038208, |
| "grad_norm_var": 0.0273227192709534, |
| "learning_rate": 0.1, |
| "loss": 2.7241, |
| "loss/crossentropy": 2.4956886768341064, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22838279604911804, |
| "loss/reg": 0.15093456208705902, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.00779, |
| "grad_norm": 2.039895534515381, |
| "grad_norm_var": 0.029073274873417175, |
| "learning_rate": 0.1, |
| "loss": 2.7465, |
| "loss/crossentropy": 2.523282051086426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22320334613323212, |
| "loss/reg": 0.15108683705329895, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.0078, |
| "grad_norm": 2.224729537963867, |
| "grad_norm_var": 0.037478661941308174, |
| "learning_rate": 0.1, |
| "loss": 2.5789, |
| "loss/crossentropy": 2.323227882385254, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2556631565093994, |
| "loss/reg": 0.15091000497341156, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.00781, |
| "grad_norm": 1.7471396923065186, |
| "grad_norm_var": 0.03780694724429067, |
| "learning_rate": 0.1, |
| "loss": 2.6242, |
| "loss/crossentropy": 2.4163246154785156, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20782677829265594, |
| "loss/reg": 0.1508997082710266, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.00782, |
| "grad_norm": 1.7806816101074219, |
| "grad_norm_var": 0.02931692927019037, |
| "learning_rate": 0.1, |
| "loss": 2.6624, |
| "loss/crossentropy": 2.43576979637146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22660242021083832, |
| "loss/reg": 0.1508878618478775, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.00783, |
| "grad_norm": 1.8929888010025024, |
| "grad_norm_var": 0.02763877413625527, |
| "learning_rate": 0.1, |
| "loss": 3.0431, |
| "loss/crossentropy": 2.827435255050659, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2156609445810318, |
| "loss/reg": 0.1509266346693039, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.00784, |
| "grad_norm": 1.7493419647216797, |
| "grad_norm_var": 0.026317647032907607, |
| "learning_rate": 0.1, |
| "loss": 2.6581, |
| "loss/crossentropy": 2.459343433380127, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19876118004322052, |
| "loss/reg": 0.15106679499149323, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.00785, |
| "grad_norm": 1.734584927558899, |
| "grad_norm_var": 0.025957925356230285, |
| "learning_rate": 0.1, |
| "loss": 2.7838, |
| "loss/crossentropy": 2.5733463764190674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21045953035354614, |
| "loss/reg": 0.15091472864151, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.00786, |
| "grad_norm": 1.6579521894454956, |
| "grad_norm_var": 0.026189317553162953, |
| "learning_rate": 0.1, |
| "loss": 2.7772, |
| "loss/crossentropy": 2.5537827014923096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2234613001346588, |
| "loss/reg": 0.15087002515792847, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.00787, |
| "grad_norm": 1.7487107515335083, |
| "grad_norm_var": 0.02673717311208031, |
| "learning_rate": 0.1, |
| "loss": 2.9595, |
| "loss/crossentropy": 2.7369134426116943, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22263549268245697, |
| "loss/reg": 0.15100540220737457, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.00788, |
| "grad_norm": 2.0498788356781006, |
| "grad_norm_var": 0.029337434733762772, |
| "learning_rate": 0.1, |
| "loss": 2.7498, |
| "loss/crossentropy": 2.488692045211792, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.26107364892959595, |
| "loss/reg": 0.15107010304927826, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.00789, |
| "grad_norm": 1.6256144046783447, |
| "grad_norm_var": 0.025238774032443227, |
| "learning_rate": 0.1, |
| "loss": 2.7479, |
| "loss/crossentropy": 2.527489423751831, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2204163372516632, |
| "loss/reg": 0.15097643435001373, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.0079, |
| "grad_norm": 1.6198973655700684, |
| "grad_norm_var": 0.027631109261218433, |
| "learning_rate": 0.1, |
| "loss": 2.5385, |
| "loss/crossentropy": 2.33186674118042, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2066439688205719, |
| "loss/reg": 0.1509358286857605, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.00791, |
| "grad_norm": 1.6421089172363281, |
| "grad_norm_var": 0.029089239136735046, |
| "learning_rate": 0.1, |
| "loss": 2.7904, |
| "loss/crossentropy": 2.5691418647766113, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22126135230064392, |
| "loss/reg": 0.15098492801189423, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.00792, |
| "grad_norm": 1.9431328773498535, |
| "grad_norm_var": 0.030154403650542307, |
| "learning_rate": 0.1, |
| "loss": 2.8731, |
| "loss/crossentropy": 2.646146774291992, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22694949805736542, |
| "loss/reg": 0.1509823203086853, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.00793, |
| "grad_norm": 1.6945117712020874, |
| "grad_norm_var": 0.030746090579102693, |
| "learning_rate": 0.1, |
| "loss": 2.3751, |
| "loss/crossentropy": 2.157468318939209, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2176416665315628, |
| "loss/reg": 0.1508500725030899, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.00794, |
| "grad_norm": 1.6674375534057617, |
| "grad_norm_var": 0.031919679030795046, |
| "learning_rate": 0.1, |
| "loss": 2.6759, |
| "loss/crossentropy": 2.4650402069091797, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21090053021907806, |
| "loss/reg": 0.15099157392978668, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.00795, |
| "grad_norm": 1.5701175928115845, |
| "grad_norm_var": 0.03075937740836802, |
| "learning_rate": 0.1, |
| "loss": 2.6447, |
| "loss/crossentropy": 2.4458796977996826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1988617479801178, |
| "loss/reg": 0.15099160373210907, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.00796, |
| "grad_norm": 1.701102375984192, |
| "grad_norm_var": 0.016273929705383654, |
| "learning_rate": 0.1, |
| "loss": 2.66, |
| "loss/crossentropy": 2.4282948970794678, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23169100284576416, |
| "loss/reg": 0.15110540390014648, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.00797, |
| "grad_norm": 2.9079794883728027, |
| "grad_norm_var": 0.10174397054277187, |
| "learning_rate": 0.1, |
| "loss": 2.885, |
| "loss/crossentropy": 2.625089645385742, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2599393129348755, |
| "loss/reg": 0.150977224111557, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.00798, |
| "grad_norm": 1.7524960041046143, |
| "grad_norm_var": 0.10190991980663042, |
| "learning_rate": 0.1, |
| "loss": 2.872, |
| "loss/crossentropy": 2.650197744369507, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22179746627807617, |
| "loss/reg": 0.15100160241127014, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.00799, |
| "grad_norm": 1.831506371498108, |
| "grad_norm_var": 0.1014647630521404, |
| "learning_rate": 0.1, |
| "loss": 2.6986, |
| "loss/crossentropy": 2.4625658988952637, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23605096340179443, |
| "loss/reg": 0.15105685591697693, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 2.1624722480773926, |
| "grad_norm_var": 0.10900981364211564, |
| "learning_rate": 0.1, |
| "loss": 2.4573, |
| "loss/crossentropy": 2.2211740016937256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23611702024936676, |
| "loss/reg": 0.1510859727859497, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.00801, |
| "grad_norm": 1.777612566947937, |
| "grad_norm_var": 0.10856754776211322, |
| "learning_rate": 0.1, |
| "loss": 3.4788, |
| "loss/crossentropy": 3.2874317169189453, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19141694903373718, |
| "loss/reg": 0.15098515152931213, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.00802, |
| "grad_norm": 1.767196536064148, |
| "grad_norm_var": 0.1067413794962457, |
| "learning_rate": 0.1, |
| "loss": 2.9241, |
| "loss/crossentropy": 2.702512264251709, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22157491743564606, |
| "loss/reg": 0.15091672539710999, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.00803, |
| "grad_norm": 2.276857614517212, |
| "grad_norm_var": 0.11765068355106792, |
| "learning_rate": 0.1, |
| "loss": 3.3777, |
| "loss/crossentropy": 3.150794267654419, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2268976867198944, |
| "loss/reg": 0.15117400884628296, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.00804, |
| "grad_norm": 1.7274490594863892, |
| "grad_norm_var": 0.11660301375311892, |
| "learning_rate": 0.1, |
| "loss": 2.7659, |
| "loss/crossentropy": 2.5552048683166504, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21072156727313995, |
| "loss/reg": 0.15110497176647186, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.00805, |
| "grad_norm": 1.773082971572876, |
| "grad_norm_var": 0.11346728144613198, |
| "learning_rate": 0.1, |
| "loss": 2.6936, |
| "loss/crossentropy": 2.488471031188965, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2051234394311905, |
| "loss/reg": 0.1510382890701294, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.00806, |
| "grad_norm": 2.4048869609832764, |
| "grad_norm_var": 0.12649037985666017, |
| "learning_rate": 0.1, |
| "loss": 2.9814, |
| "loss/crossentropy": 2.695352792739868, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.28604379296302795, |
| "loss/reg": 0.1510734111070633, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.00807, |
| "grad_norm": 1.7796632051467896, |
| "grad_norm_var": 0.12271388296467549, |
| "learning_rate": 0.1, |
| "loss": 2.6402, |
| "loss/crossentropy": 2.420163631439209, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22005164623260498, |
| "loss/reg": 0.15096436440944672, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.00808, |
| "grad_norm": 1.8072069883346558, |
| "grad_norm_var": 0.12346920424477657, |
| "learning_rate": 0.1, |
| "loss": 2.7435, |
| "loss/crossentropy": 2.5001795291900635, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24331586062908173, |
| "loss/reg": 0.150949627161026, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.00809, |
| "grad_norm": 1.9879480600357056, |
| "grad_norm_var": 0.12031814158410435, |
| "learning_rate": 0.1, |
| "loss": 2.849, |
| "loss/crossentropy": 2.613959550857544, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23499983549118042, |
| "loss/reg": 0.1509951651096344, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.0081, |
| "grad_norm": 1.6564141511917114, |
| "grad_norm_var": 0.12071302651513444, |
| "learning_rate": 0.1, |
| "loss": 2.7404, |
| "loss/crossentropy": 2.5304181575775146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20995372533798218, |
| "loss/reg": 0.15104807913303375, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.00811, |
| "grad_norm": 4.6763386726379395, |
| "grad_norm_var": 0.5745978658779629, |
| "learning_rate": 0.1, |
| "loss": 2.6587, |
| "loss/crossentropy": 2.4552841186523438, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20337538421154022, |
| "loss/reg": 0.1511099636554718, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.00812, |
| "grad_norm": 1.8454904556274414, |
| "grad_norm_var": 0.5677518679251001, |
| "learning_rate": 0.1, |
| "loss": 3.0383, |
| "loss/crossentropy": 2.8115780353546143, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22671402990818024, |
| "loss/reg": 0.15108677744865417, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.00813, |
| "grad_norm": 1.9565349817276, |
| "grad_norm_var": 0.5260687934264286, |
| "learning_rate": 0.1, |
| "loss": 2.8743, |
| "loss/crossentropy": 2.644120693206787, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23013320565223694, |
| "loss/reg": 0.15114806592464447, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.00814, |
| "grad_norm": 1.7298771142959595, |
| "grad_norm_var": 0.5270702188469062, |
| "learning_rate": 0.1, |
| "loss": 2.7379, |
| "loss/crossentropy": 2.515655040740967, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22226950526237488, |
| "loss/reg": 0.15109556913375854, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.00815, |
| "grad_norm": 1.992189645767212, |
| "grad_norm_var": 0.5235200404632129, |
| "learning_rate": 0.1, |
| "loss": 2.8157, |
| "loss/crossentropy": 2.569967269897461, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2456885725259781, |
| "loss/reg": 0.15117162466049194, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.00816, |
| "grad_norm": 2.056793451309204, |
| "grad_norm_var": 0.5230922669948233, |
| "learning_rate": 0.1, |
| "loss": 2.3698, |
| "loss/crossentropy": 2.1685283184051514, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20122523605823517, |
| "loss/reg": 0.151008740067482, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.00817, |
| "grad_norm": 2.5353429317474365, |
| "grad_norm_var": 0.5288335670571456, |
| "learning_rate": 0.1, |
| "loss": 2.7977, |
| "loss/crossentropy": 2.574751138687134, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2229909896850586, |
| "loss/reg": 0.15113280713558197, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.00818, |
| "grad_norm": 1.75123929977417, |
| "grad_norm_var": 0.5296072014646189, |
| "learning_rate": 0.1, |
| "loss": 2.6085, |
| "loss/crossentropy": 2.4087445735931396, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19974830746650696, |
| "loss/reg": 0.1512545943260193, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.00819, |
| "grad_norm": 1.7375423908233643, |
| "grad_norm_var": 0.5366742888750896, |
| "learning_rate": 0.1, |
| "loss": 2.7095, |
| "loss/crossentropy": 2.493342161178589, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2161201387643814, |
| "loss/reg": 0.15114019811153412, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0082, |
| "grad_norm": 1.8614202737808228, |
| "grad_norm_var": 0.5313444324762703, |
| "learning_rate": 0.1, |
| "loss": 2.8116, |
| "loss/crossentropy": 2.5902342796325684, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22135406732559204, |
| "loss/reg": 0.15120580792427063, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.00821, |
| "grad_norm": 2.0150632858276367, |
| "grad_norm_var": 0.5245532717891043, |
| "learning_rate": 0.1, |
| "loss": 2.6546, |
| "loss/crossentropy": 2.4433276653289795, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2113126665353775, |
| "loss/reg": 0.15116269886493683, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.00822, |
| "grad_norm": 1.8355910778045654, |
| "grad_norm_var": 0.5225867322268717, |
| "learning_rate": 0.1, |
| "loss": 2.8317, |
| "loss/crossentropy": 2.5836408138275146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2481069713830948, |
| "loss/reg": 0.1511426866054535, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.00823, |
| "grad_norm": 1.7227540016174316, |
| "grad_norm_var": 0.5250418254906357, |
| "learning_rate": 0.1, |
| "loss": 2.7702, |
| "loss/crossentropy": 2.5352981090545654, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2348913699388504, |
| "loss/reg": 0.1512099802494049, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.00824, |
| "grad_norm": 1.9596350193023682, |
| "grad_norm_var": 0.521092383786887, |
| "learning_rate": 0.1, |
| "loss": 2.73, |
| "loss/crossentropy": 2.5147976875305176, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21518126130104065, |
| "loss/reg": 0.1511077731847763, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.00825, |
| "grad_norm": 1.9192780256271362, |
| "grad_norm_var": 0.5222529251715352, |
| "learning_rate": 0.1, |
| "loss": 2.6439, |
| "loss/crossentropy": 2.453402280807495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19046884775161743, |
| "loss/reg": 0.15114519000053406, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.00826, |
| "grad_norm": 1.8421896696090698, |
| "grad_norm_var": 0.5139618226573498, |
| "learning_rate": 0.1, |
| "loss": 2.6051, |
| "loss/crossentropy": 2.3816077709198, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22347375750541687, |
| "loss/reg": 0.15116377174854279, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.00827, |
| "grad_norm": 1.6365138292312622, |
| "grad_norm_var": 0.04315749649713491, |
| "learning_rate": 0.1, |
| "loss": 2.7266, |
| "loss/crossentropy": 2.513753890991211, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2128257304430008, |
| "loss/reg": 0.15111923217773438, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.00828, |
| "grad_norm": 2.278167724609375, |
| "grad_norm_var": 0.05172260031205624, |
| "learning_rate": 0.1, |
| "loss": 2.7524, |
| "loss/crossentropy": 2.4926528930664062, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2597881853580475, |
| "loss/reg": 0.15110453963279724, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.00829, |
| "grad_norm": 1.6420785188674927, |
| "grad_norm_var": 0.05665955757236603, |
| "learning_rate": 0.1, |
| "loss": 2.6878, |
| "loss/crossentropy": 2.4756946563720703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21211113035678864, |
| "loss/reg": 0.15128032863140106, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.0083, |
| "grad_norm": 1.8594530820846558, |
| "grad_norm_var": 0.05464484275106732, |
| "learning_rate": 0.1, |
| "loss": 2.6473, |
| "loss/crossentropy": 2.4377284049987793, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20952263474464417, |
| "loss/reg": 0.15119966864585876, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.00831, |
| "grad_norm": 1.7403098344802856, |
| "grad_norm_var": 0.05602874700607566, |
| "learning_rate": 0.1, |
| "loss": 2.5931, |
| "loss/crossentropy": 2.3897907733917236, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20331606268882751, |
| "loss/reg": 0.15102870762348175, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 1.7482370138168335, |
| "grad_norm_var": 0.055511530503576975, |
| "learning_rate": 0.1, |
| "loss": 2.5933, |
| "loss/crossentropy": 2.364558219909668, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22875171899795532, |
| "loss/reg": 0.15111705660820007, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.00833, |
| "grad_norm": 1.7681249380111694, |
| "grad_norm_var": 0.02529250176376312, |
| "learning_rate": 0.1, |
| "loss": 2.5516, |
| "loss/crossentropy": 2.323167085647583, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22844187915325165, |
| "loss/reg": 0.1512153595685959, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.00834, |
| "grad_norm": 1.795610785484314, |
| "grad_norm_var": 0.024935687335108505, |
| "learning_rate": 0.1, |
| "loss": 2.4877, |
| "loss/crossentropy": 2.2634117603302, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22430318593978882, |
| "loss/reg": 0.15116415917873383, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.00835, |
| "grad_norm": 1.7789329290390015, |
| "grad_norm_var": 0.02450423851083432, |
| "learning_rate": 0.1, |
| "loss": 2.6968, |
| "loss/crossentropy": 2.483093023300171, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21367734670639038, |
| "loss/reg": 0.15126685798168182, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.00836, |
| "grad_norm": 1.782604694366455, |
| "grad_norm_var": 0.024643316793792756, |
| "learning_rate": 0.1, |
| "loss": 2.5427, |
| "loss/crossentropy": 2.3241066932678223, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2186134159564972, |
| "loss/reg": 0.15126672387123108, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.00837, |
| "grad_norm": 1.6407171487808228, |
| "grad_norm_var": 0.024303684661197106, |
| "learning_rate": 0.1, |
| "loss": 2.474, |
| "loss/crossentropy": 2.2508771419525146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2230987697839737, |
| "loss/reg": 0.15124686062335968, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.00838, |
| "grad_norm": 2.041853189468384, |
| "grad_norm_var": 0.027683331970469224, |
| "learning_rate": 0.1, |
| "loss": 2.6215, |
| "loss/crossentropy": 2.3715789318084717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24992021918296814, |
| "loss/reg": 0.15131480991840363, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.00839, |
| "grad_norm": 1.6789854764938354, |
| "grad_norm_var": 0.028383869214936415, |
| "learning_rate": 0.1, |
| "loss": 2.7312, |
| "loss/crossentropy": 2.528635263442993, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20260530710220337, |
| "loss/reg": 0.15123650431632996, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.0084, |
| "grad_norm": 1.816537618637085, |
| "grad_norm_var": 0.026990770795224724, |
| "learning_rate": 0.1, |
| "loss": 2.6328, |
| "loss/crossentropy": 2.395319700241089, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23751682043075562, |
| "loss/reg": 0.15121954679489136, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.00841, |
| "grad_norm": 1.7385938167572021, |
| "grad_norm_var": 0.026412999362490492, |
| "learning_rate": 0.1, |
| "loss": 2.7431, |
| "loss/crossentropy": 2.533646583557129, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20940619707107544, |
| "loss/reg": 0.15118320286273956, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.00842, |
| "grad_norm": 1.770753264427185, |
| "grad_norm_var": 0.026323494663344545, |
| "learning_rate": 0.1, |
| "loss": 2.8632, |
| "loss/crossentropy": 2.6413209438323975, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2218940407037735, |
| "loss/reg": 0.15128572285175323, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.00843, |
| "grad_norm": 1.7150920629501343, |
| "grad_norm_var": 0.025050582384037946, |
| "learning_rate": 0.1, |
| "loss": 2.4733, |
| "loss/crossentropy": 2.2579855918884277, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21534618735313416, |
| "loss/reg": 0.15136608481407166, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.00844, |
| "grad_norm": 1.7103124856948853, |
| "grad_norm_var": 0.00898161735115437, |
| "learning_rate": 0.1, |
| "loss": 2.4951, |
| "loss/crossentropy": 2.278857707977295, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21619471907615662, |
| "loss/reg": 0.1513548195362091, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.00845, |
| "grad_norm": 1.7302014827728271, |
| "grad_norm_var": 0.008031344597588409, |
| "learning_rate": 0.1, |
| "loss": 2.7277, |
| "loss/crossentropy": 2.510094165802002, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21760627627372742, |
| "loss/reg": 0.1514042615890503, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.00846, |
| "grad_norm": 1.740657925605774, |
| "grad_norm_var": 0.007492840395171581, |
| "learning_rate": 0.1, |
| "loss": 2.7357, |
| "loss/crossentropy": 2.5123136043548584, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22340027987957, |
| "loss/reg": 0.1514188051223755, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.00847, |
| "grad_norm": 1.72589910030365, |
| "grad_norm_var": 0.007548159339368432, |
| "learning_rate": 0.1, |
| "loss": 2.41, |
| "loss/crossentropy": 2.196450710296631, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21352747082710266, |
| "loss/reg": 0.15134373307228088, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.00848, |
| "grad_norm": 1.6422573328018188, |
| "grad_norm_var": 0.008436771868459363, |
| "learning_rate": 0.1, |
| "loss": 2.5333, |
| "loss/crossentropy": 2.326202392578125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20708459615707397, |
| "loss/reg": 0.15121881663799286, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.00849, |
| "grad_norm": 1.7320435047149658, |
| "grad_norm_var": 0.008454134853455269, |
| "learning_rate": 0.1, |
| "loss": 2.8692, |
| "loss/crossentropy": 2.6406683921813965, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22856499254703522, |
| "loss/reg": 0.15134045481681824, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 2.1380763053894043, |
| "grad_norm_var": 0.017749822432567855, |
| "learning_rate": 0.1, |
| "loss": 2.8765, |
| "loss/crossentropy": 2.656029462814331, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22050921618938446, |
| "loss/reg": 0.1511954516172409, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.00851, |
| "grad_norm": 1.709833025932312, |
| "grad_norm_var": 0.018002521208306574, |
| "learning_rate": 0.1, |
| "loss": 2.7476, |
| "loss/crossentropy": 2.5320911407470703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21548867225646973, |
| "loss/reg": 0.15135495364665985, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.00852, |
| "grad_norm": 1.937472939491272, |
| "grad_norm_var": 0.01976901102182159, |
| "learning_rate": 0.1, |
| "loss": 2.9912, |
| "loss/crossentropy": 2.7569828033447266, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23421522974967957, |
| "loss/reg": 0.1513519138097763, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.00853, |
| "grad_norm": 2.222175359725952, |
| "grad_norm_var": 0.03015348778060621, |
| "learning_rate": 0.1, |
| "loss": 2.6129, |
| "loss/crossentropy": 2.363703966140747, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24919304251670837, |
| "loss/reg": 0.15148600935935974, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.00854, |
| "grad_norm": 1.7465494871139526, |
| "grad_norm_var": 0.026698118555355185, |
| "learning_rate": 0.1, |
| "loss": 2.8858, |
| "loss/crossentropy": 2.6605887413024902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22522537410259247, |
| "loss/reg": 0.15130510926246643, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.00855, |
| "grad_norm": 1.9415252208709717, |
| "grad_norm_var": 0.026867400939983514, |
| "learning_rate": 0.1, |
| "loss": 2.883, |
| "loss/crossentropy": 2.666317939758301, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21669834852218628, |
| "loss/reg": 0.15133148431777954, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.00856, |
| "grad_norm": 1.7174979448318481, |
| "grad_norm_var": 0.027441976791978344, |
| "learning_rate": 0.1, |
| "loss": 2.8902, |
| "loss/crossentropy": 2.652575731277466, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23762255907058716, |
| "loss/reg": 0.15132835507392883, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.00857, |
| "grad_norm": 2.257584810256958, |
| "grad_norm_var": 0.03951280884078467, |
| "learning_rate": 0.1, |
| "loss": 3.0667, |
| "loss/crossentropy": 2.812617778778076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25404849648475647, |
| "loss/reg": 0.1512172669172287, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.00858, |
| "grad_norm": 1.7933690547943115, |
| "grad_norm_var": 0.03933635637457318, |
| "learning_rate": 0.1, |
| "loss": 2.4895, |
| "loss/crossentropy": 2.261209011077881, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22829587757587433, |
| "loss/reg": 0.15143099427223206, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.00859, |
| "grad_norm": 1.9528422355651855, |
| "grad_norm_var": 0.03886888259120056, |
| "learning_rate": 0.1, |
| "loss": 2.8587, |
| "loss/crossentropy": 2.617570161819458, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24113376438617706, |
| "loss/reg": 0.1512656807899475, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.0086, |
| "grad_norm": 1.7308269739151, |
| "grad_norm_var": 0.038496298493225584, |
| "learning_rate": 0.1, |
| "loss": 2.2898, |
| "loss/crossentropy": 2.0933334827423096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19646556675434113, |
| "loss/reg": 0.15130573511123657, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.00861, |
| "grad_norm": 1.6955910921096802, |
| "grad_norm_var": 0.03915827035473942, |
| "learning_rate": 0.1, |
| "loss": 2.7603, |
| "loss/crossentropy": 2.5640573501586914, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19625869393348694, |
| "loss/reg": 0.15135689079761505, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.00862, |
| "grad_norm": 1.5804342031478882, |
| "grad_norm_var": 0.043211067223693965, |
| "learning_rate": 0.1, |
| "loss": 2.601, |
| "loss/crossentropy": 2.393148899078369, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20789092779159546, |
| "loss/reg": 0.15140031278133392, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.00863, |
| "grad_norm": 1.6909512281417847, |
| "grad_norm_var": 0.04384353693056457, |
| "learning_rate": 0.1, |
| "loss": 2.8534, |
| "loss/crossentropy": 2.635213613510132, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21817569434642792, |
| "loss/reg": 0.15125516057014465, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.00864, |
| "grad_norm": 1.865833044052124, |
| "grad_norm_var": 0.04098158978652009, |
| "learning_rate": 0.1, |
| "loss": 2.677, |
| "loss/crossentropy": 2.451869249343872, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2251804918050766, |
| "loss/reg": 0.1512734442949295, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.00865, |
| "grad_norm": 1.6002877950668335, |
| "grad_norm_var": 0.044262392792282355, |
| "learning_rate": 0.1, |
| "loss": 2.547, |
| "loss/crossentropy": 2.3392629623413086, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20770519971847534, |
| "loss/reg": 0.1514132022857666, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.00866, |
| "grad_norm": 1.687819004058838, |
| "grad_norm_var": 0.03956680795968162, |
| "learning_rate": 0.1, |
| "loss": 2.5192, |
| "loss/crossentropy": 2.297617197036743, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2215721309185028, |
| "loss/reg": 0.1514103263616562, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.00867, |
| "grad_norm": 1.9280664920806885, |
| "grad_norm_var": 0.0393185419167357, |
| "learning_rate": 0.1, |
| "loss": 2.4141, |
| "loss/crossentropy": 2.199082374572754, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21497201919555664, |
| "loss/reg": 0.1515020728111267, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.00868, |
| "grad_norm": 1.854785442352295, |
| "grad_norm_var": 0.03860840521024465, |
| "learning_rate": 0.1, |
| "loss": 2.5799, |
| "loss/crossentropy": 2.3372514247894287, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24268124997615814, |
| "loss/reg": 0.1514066755771637, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.00869, |
| "grad_norm": 1.8025046586990356, |
| "grad_norm_var": 0.02762304859997068, |
| "learning_rate": 0.1, |
| "loss": 2.3223, |
| "loss/crossentropy": 2.07796573638916, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24431529641151428, |
| "loss/reg": 0.15148381888866425, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.0087, |
| "grad_norm": 1.789805293083191, |
| "grad_norm_var": 0.02741496714067632, |
| "learning_rate": 0.1, |
| "loss": 2.6111, |
| "loss/crossentropy": 2.3823256492614746, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22881478071212769, |
| "loss/reg": 0.15135745704174042, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.00871, |
| "grad_norm": 1.7687875032424927, |
| "grad_norm_var": 0.026149452966385843, |
| "learning_rate": 0.1, |
| "loss": 2.7208, |
| "loss/crossentropy": 2.4916510581970215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2291553020477295, |
| "loss/reg": 0.1514526754617691, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.00872, |
| "grad_norm": 1.9768033027648926, |
| "grad_norm_var": 0.027678858713760707, |
| "learning_rate": 0.1, |
| "loss": 2.9109, |
| "loss/crossentropy": 2.6735622882843018, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23734332621097565, |
| "loss/reg": 0.15148624777793884, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.00873, |
| "grad_norm": 1.786848545074463, |
| "grad_norm_var": 0.013499722065898586, |
| "learning_rate": 0.1, |
| "loss": 2.8304, |
| "loss/crossentropy": 2.617002248764038, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21341702342033386, |
| "loss/reg": 0.15146276354789734, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.00874, |
| "grad_norm": 1.6221749782562256, |
| "grad_norm_var": 0.01506273335111222, |
| "learning_rate": 0.1, |
| "loss": 2.6555, |
| "loss/crossentropy": 2.4505650997161865, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2049349993467331, |
| "loss/reg": 0.15147390961647034, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.00875, |
| "grad_norm": 1.7050273418426514, |
| "grad_norm_var": 0.01288918552360201, |
| "learning_rate": 0.1, |
| "loss": 2.7407, |
| "loss/crossentropy": 2.516087532043457, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2246524691581726, |
| "loss/reg": 0.15143616497516632, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.00876, |
| "grad_norm": 1.6903328895568848, |
| "grad_norm_var": 0.013124395732477426, |
| "learning_rate": 0.1, |
| "loss": 2.6824, |
| "loss/crossentropy": 2.458852529525757, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.223505437374115, |
| "loss/reg": 0.1514623463153839, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.00877, |
| "grad_norm": 2.233574151992798, |
| "grad_norm_var": 0.02710423348116251, |
| "learning_rate": 0.1, |
| "loss": 2.8475, |
| "loss/crossentropy": 2.629608631134033, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2179149091243744, |
| "loss/reg": 0.1514747589826584, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.00878, |
| "grad_norm": 1.9560211896896362, |
| "grad_norm_var": 0.025601302674121484, |
| "learning_rate": 0.1, |
| "loss": 2.4437, |
| "loss/crossentropy": 2.1911513805389404, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25255662202835083, |
| "loss/reg": 0.15144562721252441, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.00879, |
| "grad_norm": 1.7304699420928955, |
| "grad_norm_var": 0.02507174767371693, |
| "learning_rate": 0.1, |
| "loss": 2.7564, |
| "loss/crossentropy": 2.512324571609497, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2440732717514038, |
| "loss/reg": 0.15142442286014557, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.0088, |
| "grad_norm": 1.745699405670166, |
| "grad_norm_var": 0.02511861496650211, |
| "learning_rate": 0.1, |
| "loss": 2.6068, |
| "loss/crossentropy": 2.3948464393615723, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21190547943115234, |
| "loss/reg": 0.15150921046733856, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.00881, |
| "grad_norm": 1.6994541883468628, |
| "grad_norm_var": 0.02302731533941268, |
| "learning_rate": 0.1, |
| "loss": 2.7019, |
| "loss/crossentropy": 2.4910151958465576, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21086427569389343, |
| "loss/reg": 0.15154525637626648, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.00882, |
| "grad_norm": 1.6602249145507812, |
| "grad_norm_var": 0.023528613929288098, |
| "learning_rate": 0.1, |
| "loss": 2.7721, |
| "loss/crossentropy": 2.554807424545288, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2173016369342804, |
| "loss/reg": 0.1514502912759781, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.00883, |
| "grad_norm": 2.0081701278686523, |
| "grad_norm_var": 0.02519694630483252, |
| "learning_rate": 0.1, |
| "loss": 2.4442, |
| "loss/crossentropy": 2.210256814956665, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23390710353851318, |
| "loss/reg": 0.15167531371116638, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.00884, |
| "grad_norm": 1.7718380689620972, |
| "grad_norm_var": 0.025180510350448295, |
| "learning_rate": 0.1, |
| "loss": 2.6711, |
| "loss/crossentropy": 2.448080539703369, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22303438186645508, |
| "loss/reg": 0.15152262151241302, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.00885, |
| "grad_norm": 2.6652657985687256, |
| "grad_norm_var": 0.0709287547339845, |
| "learning_rate": 0.1, |
| "loss": 2.869, |
| "loss/crossentropy": 2.6223933696746826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24658727645874023, |
| "loss/reg": 0.1515502780675888, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.00886, |
| "grad_norm": 1.5742336511611938, |
| "grad_norm_var": 0.07594151445982404, |
| "learning_rate": 0.1, |
| "loss": 2.7666, |
| "loss/crossentropy": 2.5660595893859863, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20056506991386414, |
| "loss/reg": 0.15154889225959778, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.00887, |
| "grad_norm": 1.7220720052719116, |
| "grad_norm_var": 0.07658178624578836, |
| "learning_rate": 0.1, |
| "loss": 2.7775, |
| "loss/crossentropy": 2.5574543476104736, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2200058102607727, |
| "loss/reg": 0.151584193110466, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.00888, |
| "grad_norm": 2.004910469055176, |
| "grad_norm_var": 0.07711850338153561, |
| "learning_rate": 0.1, |
| "loss": 2.6048, |
| "loss/crossentropy": 2.360004186630249, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24479365348815918, |
| "loss/reg": 0.15153487026691437, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.00889, |
| "grad_norm": 1.7439953088760376, |
| "grad_norm_var": 0.07758565372948283, |
| "learning_rate": 0.1, |
| "loss": 2.8182, |
| "loss/crossentropy": 2.6062731742858887, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21197015047073364, |
| "loss/reg": 0.1515517383813858, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.0089, |
| "grad_norm": 1.6648614406585693, |
| "grad_norm_var": 0.07642653261261216, |
| "learning_rate": 0.1, |
| "loss": 2.6548, |
| "loss/crossentropy": 2.445342540740967, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20947742462158203, |
| "loss/reg": 0.15148381888866425, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.00891, |
| "grad_norm": 1.7586262226104736, |
| "grad_norm_var": 0.07558068786277942, |
| "learning_rate": 0.1, |
| "loss": 2.7963, |
| "loss/crossentropy": 2.5676562786102295, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22868549823760986, |
| "loss/reg": 0.15150345861911774, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.00892, |
| "grad_norm": 1.8258134126663208, |
| "grad_norm_var": 0.07381004817657502, |
| "learning_rate": 0.1, |
| "loss": 2.9439, |
| "loss/crossentropy": 2.7113091945648193, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23255756497383118, |
| "loss/reg": 0.15162353217601776, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.00893, |
| "grad_norm": 1.6332712173461914, |
| "grad_norm_var": 0.06645791740460437, |
| "learning_rate": 0.1, |
| "loss": 2.5697, |
| "loss/crossentropy": 2.358527421951294, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21113955974578857, |
| "loss/reg": 0.15160778164863586, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.00894, |
| "grad_norm": 1.6393007040023804, |
| "grad_norm_var": 0.06710189453430401, |
| "learning_rate": 0.1, |
| "loss": 2.4303, |
| "loss/crossentropy": 2.2129173278808594, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2173955887556076, |
| "loss/reg": 0.15161573886871338, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.00895, |
| "grad_norm": 1.7449733018875122, |
| "grad_norm_var": 0.0669747589760218, |
| "learning_rate": 0.1, |
| "loss": 2.5455, |
| "loss/crossentropy": 2.3201189041137695, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22542770206928253, |
| "loss/reg": 0.15144580602645874, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 1.7077760696411133, |
| "grad_norm_var": 0.06735903132595217, |
| "learning_rate": 0.1, |
| "loss": 2.7518, |
| "loss/crossentropy": 2.5348668098449707, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21689267456531525, |
| "loss/reg": 0.15164367854595184, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.00897, |
| "grad_norm": 1.9489948749542236, |
| "grad_norm_var": 0.06785402701448688, |
| "learning_rate": 0.1, |
| "loss": 2.8352, |
| "loss/crossentropy": 2.588930368423462, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.246311217546463, |
| "loss/reg": 0.15172646939754486, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.00898, |
| "grad_norm": 1.7753548622131348, |
| "grad_norm_var": 0.0662736244574124, |
| "learning_rate": 0.1, |
| "loss": 2.7775, |
| "loss/crossentropy": 2.5519156455993652, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22554878890514374, |
| "loss/reg": 0.15159955620765686, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.00899, |
| "grad_norm": 1.9766134023666382, |
| "grad_norm_var": 0.06556239133372326, |
| "learning_rate": 0.1, |
| "loss": 2.6141, |
| "loss/crossentropy": 2.3787364959716797, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23535825312137604, |
| "loss/reg": 0.1516241729259491, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 3.171576976776123, |
| "grad_norm_var": 0.17858606030574573, |
| "learning_rate": 0.1, |
| "loss": 3.3393, |
| "loss/crossentropy": 3.0183300971984863, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.3209747076034546, |
| "loss/reg": 0.1516529619693756, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.00901, |
| "grad_norm": 1.7961993217468262, |
| "grad_norm_var": 0.1382569255606156, |
| "learning_rate": 0.1, |
| "loss": 2.6012, |
| "loss/crossentropy": 2.3923985958099365, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20876869559288025, |
| "loss/reg": 0.15166591107845306, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.00902, |
| "grad_norm": 1.7802621126174927, |
| "grad_norm_var": 0.13318240787890634, |
| "learning_rate": 0.1, |
| "loss": 3.0413, |
| "loss/crossentropy": 2.827460527420044, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21379590034484863, |
| "loss/reg": 0.1518196165561676, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.00903, |
| "grad_norm": 1.6559505462646484, |
| "grad_norm_var": 0.13474582804570617, |
| "learning_rate": 0.1, |
| "loss": 2.2953, |
| "loss/crossentropy": 2.102956533432007, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19235935807228088, |
| "loss/reg": 0.1516178846359253, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.00904, |
| "grad_norm": 2.3716273307800293, |
| "grad_norm_var": 0.15002711441307498, |
| "learning_rate": 0.1, |
| "loss": 3.0816, |
| "loss/crossentropy": 2.8295743465423584, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25203806161880493, |
| "loss/reg": 0.1517859399318695, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.00905, |
| "grad_norm": 1.7225195169448853, |
| "grad_norm_var": 0.1504659973841721, |
| "learning_rate": 0.1, |
| "loss": 2.6744, |
| "loss/crossentropy": 2.46315336227417, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21127071976661682, |
| "loss/reg": 0.15176860988140106, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.00906, |
| "grad_norm": 1.7127317190170288, |
| "grad_norm_var": 0.14919866719851071, |
| "learning_rate": 0.1, |
| "loss": 2.4911, |
| "loss/crossentropy": 2.2734830379486084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2176201194524765, |
| "loss/reg": 0.1515757441520691, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.00907, |
| "grad_norm": 1.828104853630066, |
| "grad_norm_var": 0.14829400777336801, |
| "learning_rate": 0.1, |
| "loss": 2.9355, |
| "loss/crossentropy": 2.720423936843872, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2150929570198059, |
| "loss/reg": 0.15179337561130524, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.00908, |
| "grad_norm": 1.798911213874817, |
| "grad_norm_var": 0.1485809246804628, |
| "learning_rate": 0.1, |
| "loss": 2.6175, |
| "loss/crossentropy": 2.384119987487793, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23336076736450195, |
| "loss/reg": 0.1517447531223297, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.00909, |
| "grad_norm": 1.6159745454788208, |
| "grad_norm_var": 0.14919518046391378, |
| "learning_rate": 0.1, |
| "loss": 2.6471, |
| "loss/crossentropy": 2.4449269771575928, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20216551423072815, |
| "loss/reg": 0.1517157405614853, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.0091, |
| "grad_norm": 1.8443286418914795, |
| "grad_norm_var": 0.144957337469526, |
| "learning_rate": 0.1, |
| "loss": 2.4973, |
| "loss/crossentropy": 2.26932692527771, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22793231904506683, |
| "loss/reg": 0.15166865289211273, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.00911, |
| "grad_norm": 1.828259825706482, |
| "grad_norm_var": 0.14363330555165324, |
| "learning_rate": 0.1, |
| "loss": 2.7195, |
| "loss/crossentropy": 2.4848718643188477, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23460876941680908, |
| "loss/reg": 0.15173785388469696, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.00912, |
| "grad_norm": 2.4518802165985107, |
| "grad_norm_var": 0.1583294393605397, |
| "learning_rate": 0.1, |
| "loss": 2.7125, |
| "loss/crossentropy": 2.4464292526245117, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.266059935092926, |
| "loss/reg": 0.15187551081180573, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.00913, |
| "grad_norm": 1.7201437950134277, |
| "grad_norm_var": 0.1617846235466421, |
| "learning_rate": 0.1, |
| "loss": 2.5454, |
| "loss/crossentropy": 2.3176207542419434, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2278132140636444, |
| "loss/reg": 0.1516762524843216, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.00914, |
| "grad_norm": 1.850110650062561, |
| "grad_norm_var": 0.1604863069633637, |
| "learning_rate": 0.1, |
| "loss": 2.5676, |
| "loss/crossentropy": 2.3167519569396973, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.25087612867355347, |
| "loss/reg": 0.15167440474033356, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.00915, |
| "grad_norm": 1.7149795293807983, |
| "grad_norm_var": 0.1636730822726955, |
| "learning_rate": 0.1, |
| "loss": 2.4139, |
| "loss/crossentropy": 2.200120449066162, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21378323435783386, |
| "loss/reg": 0.15190301835536957, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.00916, |
| "grad_norm": 1.897722601890564, |
| "grad_norm_var": 0.054039198047092896, |
| "learning_rate": 0.1, |
| "loss": 2.7554, |
| "loss/crossentropy": 2.523404598236084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2319614142179489, |
| "loss/reg": 0.1518702208995819, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.00917, |
| "grad_norm": 1.9186159372329712, |
| "grad_norm_var": 0.05410816689822765, |
| "learning_rate": 0.1, |
| "loss": 2.9598, |
| "loss/crossentropy": 2.717954397201538, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24185343086719513, |
| "loss/reg": 0.15173277258872986, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.00918, |
| "grad_norm": 1.772187352180481, |
| "grad_norm_var": 0.05419486896133675, |
| "learning_rate": 0.1, |
| "loss": 2.8165, |
| "loss/crossentropy": 2.578883171081543, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23761451244354248, |
| "loss/reg": 0.1518423855304718, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.00919, |
| "grad_norm": 1.8386093378067017, |
| "grad_norm_var": 0.05139577732893083, |
| "learning_rate": 0.1, |
| "loss": 2.7472, |
| "loss/crossentropy": 2.526249885559082, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22093181312084198, |
| "loss/reg": 0.1517849713563919, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.0092, |
| "grad_norm": 1.8304262161254883, |
| "grad_norm_var": 0.03335428855927087, |
| "learning_rate": 0.1, |
| "loss": 2.5102, |
| "loss/crossentropy": 2.2870755195617676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2231561243534088, |
| "loss/reg": 0.15178850293159485, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.00921, |
| "grad_norm": 1.6935133934020996, |
| "grad_norm_var": 0.03383838616345442, |
| "learning_rate": 0.1, |
| "loss": 2.9663, |
| "loss/crossentropy": 2.7505202293395996, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21575099229812622, |
| "loss/reg": 0.151752308011055, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.00922, |
| "grad_norm": 1.726082444190979, |
| "grad_norm_var": 0.033636716574486476, |
| "learning_rate": 0.1, |
| "loss": 2.7678, |
| "loss/crossentropy": 2.555203914642334, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21259337663650513, |
| "loss/reg": 0.15180544555187225, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.00923, |
| "grad_norm": 1.9385335445404053, |
| "grad_norm_var": 0.034325094327211286, |
| "learning_rate": 0.1, |
| "loss": 2.5126, |
| "loss/crossentropy": 2.274322509765625, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23827750980854034, |
| "loss/reg": 0.15187132358551025, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.00924, |
| "grad_norm": 1.7582217454910278, |
| "grad_norm_var": 0.0346515835142912, |
| "learning_rate": 0.1, |
| "loss": 2.5267, |
| "loss/crossentropy": 2.2920174598693848, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23463678359985352, |
| "loss/reg": 0.1518162190914154, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.00925, |
| "grad_norm": 1.6704492568969727, |
| "grad_norm_var": 0.033228233429720845, |
| "learning_rate": 0.1, |
| "loss": 2.838, |
| "loss/crossentropy": 2.6272783279418945, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21072882413864136, |
| "loss/reg": 0.15173053741455078, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.00926, |
| "grad_norm": 1.5782722234725952, |
| "grad_norm_var": 0.03752998737509922, |
| "learning_rate": 0.1, |
| "loss": 2.6325, |
| "loss/crossentropy": 2.4263346195220947, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20614410936832428, |
| "loss/reg": 0.15190660953521729, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.00927, |
| "grad_norm": 1.894101619720459, |
| "grad_norm_var": 0.03783613119460553, |
| "learning_rate": 0.1, |
| "loss": 2.8579, |
| "loss/crossentropy": 2.6345279216766357, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.223392516374588, |
| "loss/reg": 0.15189293026924133, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.00928, |
| "grad_norm": 2.1781187057495117, |
| "grad_norm_var": 0.019760976634764362, |
| "learning_rate": 0.1, |
| "loss": 2.7061, |
| "loss/crossentropy": 2.4536638259887695, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2523956894874573, |
| "loss/reg": 0.1518169641494751, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.00929, |
| "grad_norm": 1.7708848714828491, |
| "grad_norm_var": 0.019305478406016617, |
| "learning_rate": 0.1, |
| "loss": 2.6017, |
| "loss/crossentropy": 2.385331153869629, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21638329327106476, |
| "loss/reg": 0.15177251398563385, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.0093, |
| "grad_norm": 1.8832210302352905, |
| "grad_norm_var": 0.019531530924202182, |
| "learning_rate": 0.1, |
| "loss": 2.3697, |
| "loss/crossentropy": 2.1488101482391357, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22087591886520386, |
| "loss/reg": 0.15202932059764862, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.00931, |
| "grad_norm": 1.5858062505722046, |
| "grad_norm_var": 0.022322822237683938, |
| "learning_rate": 0.1, |
| "loss": 2.3714, |
| "loss/crossentropy": 2.1723973751068115, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1990160495042801, |
| "loss/reg": 0.15185227990150452, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.00932, |
| "grad_norm": 1.7634512186050415, |
| "grad_norm_var": 0.021850903390696886, |
| "learning_rate": 0.1, |
| "loss": 2.7311, |
| "loss/crossentropy": 2.5095791816711426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2215052843093872, |
| "loss/reg": 0.1519194096326828, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.00933, |
| "grad_norm": 1.6721258163452148, |
| "grad_norm_var": 0.02175090246909542, |
| "learning_rate": 0.1, |
| "loss": 2.8286, |
| "loss/crossentropy": 2.6169848442077637, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21162506937980652, |
| "loss/reg": 0.1519974172115326, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.00934, |
| "grad_norm": 1.8768491744995117, |
| "grad_norm_var": 0.02226196289900576, |
| "learning_rate": 0.1, |
| "loss": 3.1066, |
| "loss/crossentropy": 2.8633530139923096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24328918755054474, |
| "loss/reg": 0.1519625335931778, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.00935, |
| "grad_norm": 2.065946340560913, |
| "grad_norm_var": 0.026930157892601336, |
| "learning_rate": 0.1, |
| "loss": 2.9589, |
| "loss/crossentropy": 2.7255752086639404, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23329448699951172, |
| "loss/reg": 0.1518731713294983, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.00936, |
| "grad_norm": 1.7530063390731812, |
| "grad_norm_var": 0.027046180269890623, |
| "learning_rate": 0.1, |
| "loss": 2.5289, |
| "loss/crossentropy": 2.3079566955566406, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22095265984535217, |
| "loss/reg": 0.15193763375282288, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.00937, |
| "grad_norm": 1.751245141029358, |
| "grad_norm_var": 0.02643067248569929, |
| "learning_rate": 0.1, |
| "loss": 2.8463, |
| "loss/crossentropy": 2.6235086917877197, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22283971309661865, |
| "loss/reg": 0.1518317610025406, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.00938, |
| "grad_norm": 1.9708318710327148, |
| "grad_norm_var": 0.0276271383923356, |
| "learning_rate": 0.1, |
| "loss": 2.6227, |
| "loss/crossentropy": 2.3812296390533447, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24146738648414612, |
| "loss/reg": 0.15186211466789246, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.00939, |
| "grad_norm": 1.7769073247909546, |
| "grad_norm_var": 0.026693376694053406, |
| "learning_rate": 0.1, |
| "loss": 2.5096, |
| "loss/crossentropy": 2.2793614864349365, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23025177419185638, |
| "loss/reg": 0.1519736647605896, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.0094, |
| "grad_norm": 1.971938133239746, |
| "grad_norm_var": 0.02809140583977765, |
| "learning_rate": 0.1, |
| "loss": 2.6183, |
| "loss/crossentropy": 2.361912727355957, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2563827633857727, |
| "loss/reg": 0.15198439359664917, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.00941, |
| "grad_norm": 1.734709620475769, |
| "grad_norm_var": 0.027045025165358005, |
| "learning_rate": 0.1, |
| "loss": 2.6079, |
| "loss/crossentropy": 2.382603168487549, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22526073455810547, |
| "loss/reg": 0.15194866061210632, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.00942, |
| "grad_norm": 1.6385562419891357, |
| "grad_norm_var": 0.025275222037608576, |
| "learning_rate": 0.1, |
| "loss": 2.5931, |
| "loss/crossentropy": 2.385246753692627, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20786648988723755, |
| "loss/reg": 0.15204212069511414, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.00943, |
| "grad_norm": 1.6411834955215454, |
| "grad_norm_var": 0.02712776239141584, |
| "learning_rate": 0.1, |
| "loss": 2.5944, |
| "loss/crossentropy": 2.3896472454071045, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2047324925661087, |
| "loss/reg": 0.15192319452762604, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.00944, |
| "grad_norm": 2.2787599563598633, |
| "grad_norm_var": 0.03263780971972583, |
| "learning_rate": 0.1, |
| "loss": 3.629, |
| "loss/crossentropy": 3.4106805324554443, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21832598745822906, |
| "loss/reg": 0.15213294327259064, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.00945, |
| "grad_norm": 1.6873010396957397, |
| "grad_norm_var": 0.033632557354250456, |
| "learning_rate": 0.1, |
| "loss": 2.6945, |
| "loss/crossentropy": 2.4706761837005615, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22385725378990173, |
| "loss/reg": 0.1520669162273407, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.00946, |
| "grad_norm": 1.9756712913513184, |
| "grad_norm_var": 0.03499856731846549, |
| "learning_rate": 0.1, |
| "loss": 2.794, |
| "loss/crossentropy": 2.5755763053894043, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21842074394226074, |
| "loss/reg": 0.15193140506744385, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.00947, |
| "grad_norm": 1.8527393341064453, |
| "grad_norm_var": 0.031062658755957706, |
| "learning_rate": 0.1, |
| "loss": 2.8215, |
| "loss/crossentropy": 2.6194565296173096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20199859142303467, |
| "loss/reg": 0.15207552909851074, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.00948, |
| "grad_norm": 1.674757957458496, |
| "grad_norm_var": 0.032438292935995365, |
| "learning_rate": 0.1, |
| "loss": 2.6683, |
| "loss/crossentropy": 2.4555649757385254, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2127261757850647, |
| "loss/reg": 0.15201851725578308, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.00949, |
| "grad_norm": 1.9119518995285034, |
| "grad_norm_var": 0.030899767857665416, |
| "learning_rate": 0.1, |
| "loss": 2.5786, |
| "loss/crossentropy": 2.3483386039733887, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23023086786270142, |
| "loss/reg": 0.15204684436321259, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 1.7950687408447266, |
| "grad_norm_var": 0.03099935027051964, |
| "learning_rate": 0.1, |
| "loss": 2.4949, |
| "loss/crossentropy": 2.2699592113494873, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2249831259250641, |
| "loss/reg": 0.15197406709194183, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.00951, |
| "grad_norm": 2.099536895751953, |
| "grad_norm_var": 0.03207046794753546, |
| "learning_rate": 0.1, |
| "loss": 2.7914, |
| "loss/crossentropy": 2.5401554107666016, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2512205243110657, |
| "loss/reg": 0.15196606516838074, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.00952, |
| "grad_norm": 1.9187355041503906, |
| "grad_norm_var": 0.03176235654916392, |
| "learning_rate": 0.1, |
| "loss": 2.8771, |
| "loss/crossentropy": 2.6496472358703613, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22747257351875305, |
| "loss/reg": 0.15210643410682678, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.00953, |
| "grad_norm": 2.0391948223114014, |
| "grad_norm_var": 0.03296130803944657, |
| "learning_rate": 0.1, |
| "loss": 2.7811, |
| "loss/crossentropy": 2.540827751159668, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24023352563381195, |
| "loss/reg": 0.15205425024032593, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.00954, |
| "grad_norm": 1.9917867183685303, |
| "grad_norm_var": 0.03326211961624489, |
| "learning_rate": 0.1, |
| "loss": 2.5003, |
| "loss/crossentropy": 2.2541849613189697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24608798325061798, |
| "loss/reg": 0.1521024852991104, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.00955, |
| "grad_norm": 1.6118721961975098, |
| "grad_norm_var": 0.037107500335859335, |
| "learning_rate": 0.1, |
| "loss": 2.5861, |
| "loss/crossentropy": 2.3877437114715576, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19836172461509705, |
| "loss/reg": 0.15207068622112274, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.00956, |
| "grad_norm": 2.2274155616760254, |
| "grad_norm_var": 0.04486406543868782, |
| "learning_rate": 0.1, |
| "loss": 2.6224, |
| "loss/crossentropy": 2.389625310897827, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23273301124572754, |
| "loss/reg": 0.15192876756191254, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.00957, |
| "grad_norm": 1.6840041875839233, |
| "grad_norm_var": 0.04600670311299382, |
| "learning_rate": 0.1, |
| "loss": 2.7703, |
| "loss/crossentropy": 2.5624279975891113, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20791786909103394, |
| "loss/reg": 0.15210209786891937, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.00958, |
| "grad_norm": 1.5767619609832764, |
| "grad_norm_var": 0.04820817230487743, |
| "learning_rate": 0.1, |
| "loss": 2.5739, |
| "loss/crossentropy": 2.369368076324463, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2045065015554428, |
| "loss/reg": 0.15214532613754272, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.00959, |
| "grad_norm": 1.5548317432403564, |
| "grad_norm_var": 0.051342340654793324, |
| "learning_rate": 0.1, |
| "loss": 2.6556, |
| "loss/crossentropy": 2.3910350799560547, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2645493447780609, |
| "loss/reg": 0.1521218866109848, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 1.5721207857131958, |
| "grad_norm_var": 0.04380499918990092, |
| "learning_rate": 0.1, |
| "loss": 2.8639, |
| "loss/crossentropy": 2.679495096206665, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18439072370529175, |
| "loss/reg": 0.15199697017669678, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.00961, |
| "grad_norm": 1.6949290037155151, |
| "grad_norm_var": 0.04367025601913852, |
| "learning_rate": 0.1, |
| "loss": 2.7364, |
| "loss/crossentropy": 2.520474672317505, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21596890687942505, |
| "loss/reg": 0.1521150767803192, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.00962, |
| "grad_norm": 1.7289111614227295, |
| "grad_norm_var": 0.04248033531650795, |
| "learning_rate": 0.1, |
| "loss": 2.7928, |
| "loss/crossentropy": 2.578625440597534, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21414420008659363, |
| "loss/reg": 0.1520446538925171, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.00963, |
| "grad_norm": 1.8694390058517456, |
| "grad_norm_var": 0.042596461829378464, |
| "learning_rate": 0.1, |
| "loss": 2.7314, |
| "loss/crossentropy": 2.5003855228424072, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2310074269771576, |
| "loss/reg": 0.15215134620666504, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.00964, |
| "grad_norm": 1.675666093826294, |
| "grad_norm_var": 0.042580203314122815, |
| "learning_rate": 0.1, |
| "loss": 2.6102, |
| "loss/crossentropy": 2.366222620010376, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24394306540489197, |
| "loss/reg": 0.15221883356571198, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.00965, |
| "grad_norm": 1.69108247756958, |
| "grad_norm_var": 0.04261244390599449, |
| "learning_rate": 0.1, |
| "loss": 2.8257, |
| "loss/crossentropy": 2.613560676574707, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21218189597129822, |
| "loss/reg": 0.15201933681964874, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.00966, |
| "grad_norm": 1.7221252918243408, |
| "grad_norm_var": 0.04295122542294602, |
| "learning_rate": 0.1, |
| "loss": 2.5082, |
| "loss/crossentropy": 2.2699313163757324, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23822391033172607, |
| "loss/reg": 0.15223154425621033, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.00967, |
| "grad_norm": 1.7041839361190796, |
| "grad_norm_var": 0.0364640443460719, |
| "learning_rate": 0.1, |
| "loss": 2.6471, |
| "loss/crossentropy": 2.414999485015869, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23212993144989014, |
| "loss/reg": 0.15211285650730133, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.00968, |
| "grad_norm": 1.8748183250427246, |
| "grad_norm_var": 0.035692811553216984, |
| "learning_rate": 0.1, |
| "loss": 2.9063, |
| "loss/crossentropy": 2.6657252311706543, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2405734807252884, |
| "loss/reg": 0.15213453769683838, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.00969, |
| "grad_norm": 1.8031322956085205, |
| "grad_norm_var": 0.03050434422646801, |
| "learning_rate": 0.1, |
| "loss": 2.5427, |
| "loss/crossentropy": 2.3174121379852295, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22524681687355042, |
| "loss/reg": 0.15223067998886108, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.0097, |
| "grad_norm": 1.5883830785751343, |
| "grad_norm_var": 0.02761335388845983, |
| "learning_rate": 0.1, |
| "loss": 2.7745, |
| "loss/crossentropy": 2.5567333698272705, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21776434779167175, |
| "loss/reg": 0.15216635167598724, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.00971, |
| "grad_norm": 1.7091857194900513, |
| "grad_norm_var": 0.026753856083559846, |
| "learning_rate": 0.1, |
| "loss": 2.5377, |
| "loss/crossentropy": 2.3131892681121826, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22454933822155, |
| "loss/reg": 0.15221859514713287, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.00972, |
| "grad_norm": 1.6447182893753052, |
| "grad_norm_var": 0.009314557959134835, |
| "learning_rate": 0.1, |
| "loss": 2.8219, |
| "loss/crossentropy": 2.6119890213012695, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20989596843719482, |
| "loss/reg": 0.1521187722682953, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.00973, |
| "grad_norm": 1.521199345588684, |
| "grad_norm_var": 0.011174959644570019, |
| "learning_rate": 0.1, |
| "loss": 2.5971, |
| "loss/crossentropy": 2.3898251056671143, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2072967141866684, |
| "loss/reg": 0.15229454636573792, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.00974, |
| "grad_norm": 1.982746958732605, |
| "grad_norm_var": 0.015713839254212717, |
| "learning_rate": 0.1, |
| "loss": 2.5385, |
| "loss/crossentropy": 2.297492265701294, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24098548293113708, |
| "loss/reg": 0.152153879404068, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.00975, |
| "grad_norm": 1.731439232826233, |
| "grad_norm_var": 0.014042529806266657, |
| "learning_rate": 0.1, |
| "loss": 2.7375, |
| "loss/crossentropy": 2.509065866470337, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2284477800130844, |
| "loss/reg": 0.15217019617557526, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.00976, |
| "grad_norm": 1.769273042678833, |
| "grad_norm_var": 0.012594271501907823, |
| "learning_rate": 0.1, |
| "loss": 2.6801, |
| "loss/crossentropy": 2.4693803787231445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21069443225860596, |
| "loss/reg": 0.15223300457000732, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.00977, |
| "grad_norm": 1.7092992067337036, |
| "grad_norm_var": 0.012536240709094612, |
| "learning_rate": 0.1, |
| "loss": 2.7206, |
| "loss/crossentropy": 2.490663766860962, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22993411123752594, |
| "loss/reg": 0.15214157104492188, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.00978, |
| "grad_norm": 1.7134039402008057, |
| "grad_norm_var": 0.012559414834582998, |
| "learning_rate": 0.1, |
| "loss": 2.5615, |
| "loss/crossentropy": 2.342837333679199, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21865780651569366, |
| "loss/reg": 0.15224945545196533, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.00979, |
| "grad_norm": 1.8809362649917603, |
| "grad_norm_var": 0.012778548502409079, |
| "learning_rate": 0.1, |
| "loss": 2.8899, |
| "loss/crossentropy": 2.6709282398223877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2189548909664154, |
| "loss/reg": 0.152319073677063, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.0098, |
| "grad_norm": 1.9110846519470215, |
| "grad_norm_var": 0.014455323436797526, |
| "learning_rate": 0.1, |
| "loss": 2.4073, |
| "loss/crossentropy": 2.1703274250030518, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23699511587619781, |
| "loss/reg": 0.15227392315864563, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.00981, |
| "grad_norm": 1.5887112617492676, |
| "grad_norm_var": 0.015877836771995568, |
| "learning_rate": 0.1, |
| "loss": 2.6137, |
| "loss/crossentropy": 2.3956210613250732, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21806836128234863, |
| "loss/reg": 0.1522170901298523, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.00982, |
| "grad_norm": 1.545395851135254, |
| "grad_norm_var": 0.018272678243892376, |
| "learning_rate": 0.1, |
| "loss": 2.6177, |
| "loss/crossentropy": 2.4133400917053223, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2043379545211792, |
| "loss/reg": 0.15220846235752106, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.00983, |
| "grad_norm": 1.6206599473953247, |
| "grad_norm_var": 0.01899474196982324, |
| "learning_rate": 0.1, |
| "loss": 2.604, |
| "loss/crossentropy": 2.3965511322021484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20746548473834991, |
| "loss/reg": 0.1522972583770752, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.00984, |
| "grad_norm": 1.6314990520477295, |
| "grad_norm_var": 0.01782313687711549, |
| "learning_rate": 0.1, |
| "loss": 2.602, |
| "loss/crossentropy": 2.3807995319366455, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22123152017593384, |
| "loss/reg": 0.15230204164981842, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.00985, |
| "grad_norm": 1.6598889827728271, |
| "grad_norm_var": 0.017316146575559264, |
| "learning_rate": 0.1, |
| "loss": 3.0089, |
| "loss/crossentropy": 2.790135383605957, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21872150897979736, |
| "loss/reg": 0.1522573083639145, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.00986, |
| "grad_norm": 1.698798656463623, |
| "grad_norm_var": 0.016427688792159025, |
| "learning_rate": 0.1, |
| "loss": 2.8418, |
| "loss/crossentropy": 2.6150970458984375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22669515013694763, |
| "loss/reg": 0.1524110585451126, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.00987, |
| "grad_norm": 1.9350906610488892, |
| "grad_norm_var": 0.019671341449357508, |
| "learning_rate": 0.1, |
| "loss": 2.622, |
| "loss/crossentropy": 2.391024589538574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23092934489250183, |
| "loss/reg": 0.1523534059524536, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.00988, |
| "grad_norm": 1.5841007232666016, |
| "grad_norm_var": 0.02052164650872938, |
| "learning_rate": 0.1, |
| "loss": 2.6656, |
| "loss/crossentropy": 2.4377593994140625, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2278522402048111, |
| "loss/reg": 0.15228508412837982, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.00989, |
| "grad_norm": 2.7904202938079834, |
| "grad_norm_var": 0.08794709350955993, |
| "learning_rate": 0.1, |
| "loss": 2.8781, |
| "loss/crossentropy": 2.5933585166931152, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.28478509187698364, |
| "loss/reg": 0.15240223705768585, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.0099, |
| "grad_norm": 1.8948380947113037, |
| "grad_norm_var": 0.08625346614631055, |
| "learning_rate": 0.1, |
| "loss": 2.4918, |
| "loss/crossentropy": 2.258913993835449, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23290902376174927, |
| "loss/reg": 0.15240952372550964, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.00991, |
| "grad_norm": 1.6940659284591675, |
| "grad_norm_var": 0.08664031470375301, |
| "learning_rate": 0.1, |
| "loss": 2.57, |
| "loss/crossentropy": 2.3558802604675293, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2140781581401825, |
| "loss/reg": 0.15233653783798218, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.00992, |
| "grad_norm": 1.633371114730835, |
| "grad_norm_var": 0.08815603155189435, |
| "learning_rate": 0.1, |
| "loss": 2.7131, |
| "loss/crossentropy": 2.5057554244995117, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20731468498706818, |
| "loss/reg": 0.15228934586048126, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.00993, |
| "grad_norm": 1.6418057680130005, |
| "grad_norm_var": 0.08908349157937506, |
| "learning_rate": 0.1, |
| "loss": 2.5969, |
| "loss/crossentropy": 2.3828420639038086, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2140577733516693, |
| "loss/reg": 0.15232902765274048, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.00994, |
| "grad_norm": 1.8615351915359497, |
| "grad_norm_var": 0.08920863315119215, |
| "learning_rate": 0.1, |
| "loss": 2.8429, |
| "loss/crossentropy": 2.594074249267578, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.24883317947387695, |
| "loss/reg": 0.1523798108100891, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.00995, |
| "grad_norm": 1.783341646194458, |
| "grad_norm_var": 0.0885654698482184, |
| "learning_rate": 0.1, |
| "loss": 2.6152, |
| "loss/crossentropy": 2.397364377975464, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21784093976020813, |
| "loss/reg": 0.15237665176391602, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.00996, |
| "grad_norm": 1.713290810585022, |
| "grad_norm_var": 0.0875447006733765, |
| "learning_rate": 0.1, |
| "loss": 2.7776, |
| "loss/crossentropy": 2.554166793823242, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22345495223999023, |
| "loss/reg": 0.15242858231067657, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.00997, |
| "grad_norm": 1.8568384647369385, |
| "grad_norm_var": 0.08565333188954369, |
| "learning_rate": 0.1, |
| "loss": 2.7547, |
| "loss/crossentropy": 2.5045394897460938, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2501196265220642, |
| "loss/reg": 0.15244552493095398, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.00998, |
| "grad_norm": 1.7091144323349, |
| "grad_norm_var": 0.08211875930964958, |
| "learning_rate": 0.1, |
| "loss": 2.777, |
| "loss/crossentropy": 2.565586566925049, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21137075126171112, |
| "loss/reg": 0.1524423360824585, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.00999, |
| "grad_norm": 1.8013511896133423, |
| "grad_norm_var": 0.07997618837147887, |
| "learning_rate": 0.1, |
| "loss": 2.591, |
| "loss/crossentropy": 2.392230987548828, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19876420497894287, |
| "loss/reg": 0.15232834219932556, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 1.638023853302002, |
| "grad_norm_var": 0.07982739951320064, |
| "learning_rate": 0.1, |
| "loss": 2.5326, |
| "loss/crossentropy": 2.3327131271362305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19983986020088196, |
| "loss/reg": 0.1524507850408554, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.28811723128832e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|