diff --git "a/weights/checkpoint_epoch_15_metadata.json" "b/weights/checkpoint_epoch_15_metadata.json" --- "a/weights/checkpoint_epoch_15_metadata.json" +++ "b/weights/checkpoint_epoch_15_metadata.json" @@ -3,250 +3,225 @@ "optimizer_state_dict": { "state": { "0": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[-5.9878e-05, 1.5145e-04, -8.0526e-06, ..., -2.3528e-05,\n -8.7828e-06, -1.7757e-05],\n [ 2.1625e-05, -5.5180e-06, -6.7915e-06, ..., 4.0538e-05,\n -3.8513e-05, -2.6659e-05],\n [-2.6284e-05, 6.7238e-05, -4.8466e-05, ..., 2.9354e-05,\n -1.4989e-05, -3.1615e-05],\n ...,\n [ 4.0357e-05, 9.3667e-06, -3.2310e-05, ..., 2.0847e-05,\n 3.6688e-06, 8.8120e-07],\n [-1.1356e-14, 4.9165e-15, 1.1260e-14, ..., 6.7834e-15,\n 4.8789e-15, -1.7235e-14],\n [-6.6361e-05, 5.0332e-05, -1.1280e-05, ..., -1.4861e-05,\n 4.2881e-05, 2.7634e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.8155e-08, 6.0153e-08, 7.2310e-09, ..., 1.7531e-08, 1.6602e-08,\n 4.3246e-09],\n [1.1362e-07, 7.6477e-08, 2.8919e-08, ..., 5.1424e-08, 2.4834e-08,\n 3.8752e-08],\n [6.0382e-08, 8.7293e-08, 2.0518e-08, ..., 2.3148e-08, 1.5363e-08,\n 1.9589e-08],\n ...,\n [1.5971e-07, 9.2553e-08, 1.8061e-08, ..., 2.2711e-08, 1.7362e-08,\n 1.1314e-08],\n [1.0775e-11, 2.9446e-11, 4.9464e-12, ..., 9.0051e-12, 7.8304e-12,\n 6.5481e-12],\n [1.0519e-07, 7.4661e-08, 1.0791e-08, ..., 1.4872e-08, 2.4900e-08,\n 1.1094e-08]], device='cuda:0')" + "step": "tensor(18780.)", + "exp_avg": "tensor([[ 6.9046e-06, -1.4131e-05, 3.6608e-05, ..., -1.3263e-05,\n -1.7930e-05, -1.9217e-05],\n [ 1.0277e-05, -1.0210e-05, -3.9813e-05, ..., -6.7457e-06,\n -2.6719e-05, 9.3058e-06],\n [ 1.3385e-06, -8.3710e-07, -5.2132e-07, ..., 1.4385e-06,\n 2.0080e-06, -3.8964e-06],\n ...,\n [ 5.2516e-05, -1.2141e-05, -3.3344e-05, ..., 1.0439e-05,\n 2.1196e-06, 3.8359e-05],\n [ 1.7448e-05, 1.1857e-05, -1.3080e-05, ..., -4.4515e-05,\n -6.1304e-06, 2.8262e-05],\n [-1.4673e-06, -1.4217e-06, -1.6492e-05, ..., -5.5011e-06,\n 6.8216e-06, -6.4893e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.0127e-09, 1.0732e-08, 5.5719e-09, ..., 6.8149e-09, 5.7613e-09,\n 4.3411e-09],\n [8.6893e-09, 7.6218e-09, 9.0608e-09, ..., 6.8700e-09, 5.0676e-09,\n 4.4174e-09],\n [2.4400e-10, 2.2540e-10, 1.1126e-10, ..., 1.6418e-10, 1.6169e-10,\n 1.2518e-10],\n ...,\n [9.9051e-09, 7.7035e-09, 7.3666e-09, ..., 5.7963e-09, 5.5910e-09,\n 4.1583e-09],\n [1.2027e-08, 1.0075e-08, 7.8236e-09, ..., 8.9864e-09, 6.4781e-09,\n 5.4778e-09],\n [2.9772e-09, 5.1540e-09, 2.9757e-09, ..., 1.9636e-09, 2.3101e-09,\n 1.8583e-09]], device='cuda:0')" }, "1": { - "step": "tensor(7512.)", - "exp_avg": "tensor([-1.5547e-03, 2.0071e-03, 3.8861e-04, -1.6003e-03, -1.2329e-04,\n -5.0467e-05, -2.0337e-03, 2.4382e-04, 2.0350e-04, -2.1919e-05,\n 9.4562e-04, 4.2298e-04, 5.6052e-45, -3.5132e-04, 5.0270e-04,\n 9.9834e-04, 1.8121e-03, -6.9888e-04, 3.1928e-03, 5.6052e-45,\n -1.0468e-03, -1.2197e-03, 6.0330e-05, -4.6202e-03, 5.6052e-45,\n -1.1990e-03, -7.7044e-04, 1.9845e-03, -3.0753e-03, -9.7927e-04,\n -8.3084e-04, -1.5502e-03, 3.9647e-39, 5.6052e-45, 1.2096e-03,\n 1.2746e-03, 3.8038e-04, -3.0524e-04, 1.3418e-03, -3.0765e-04,\n -2.2403e-03, -4.2497e-04, 3.0549e-03, -5.8180e-04, 7.2136e-04,\n -7.7200e-04, 4.3062e-04, 2.4294e-04, -7.5414e-04, 9.3444e-04,\n 7.2593e-04, 5.6052e-45, 1.4848e-36, 5.6052e-45, 4.3734e-04,\n 2.5002e-03, 1.2859e-03, 5.1210e-04, 9.3318e-04, -2.3428e-03,\n 1.2630e-03, 3.5347e-03, -4.0320e-03, 5.1594e-04, 8.6356e-04,\n 8.7518e-04, 1.0819e-03, 5.6052e-45, 1.4315e-03, -3.6576e-03,\n 1.4177e-04, 5.6052e-45, 2.0659e-03, 2.4878e-03, 5.4197e-05,\n -7.9008e-04, 1.3947e-04, 2.1853e-03, -4.5762e-04, -2.2738e-03,\n -1.3668e-03, -3.8966e-03, 2.0002e-03, 2.1052e-04, -1.8271e-04,\n -1.5527e-03, 1.3430e-03, -1.0526e-03, -2.0740e-03, 2.9635e-04,\n 5.6052e-45, 1.9730e-03, 7.7154e-04, -2.9497e-03, 1.0967e-03,\n 2.3390e-04, -1.6306e-03, 1.8335e-03, 2.3571e-04, 7.4950e-04,\n 2.4118e-03, -1.2847e-05, 5.5689e-04, -1.9352e-03, 3.6302e-03,\n 1.5116e-03, 4.2673e-04, -1.7794e-03, -1.5053e-03, 1.0194e-03,\n 1.7510e-03, -7.3855e-04, 5.6052e-45, 1.4297e-03, -4.4048e-04,\n 2.3473e-03, 4.1661e-04, -8.0947e-04, 3.6507e-12, 7.2896e-13,\n 5.6052e-45, 1.2639e-03, 5.6052e-45, -1.9938e-03, 4.6707e-04,\n 4.4636e-03, 1.5068e-03, 5.6052e-45, 1.2132e-04, -7.4410e-04,\n 6.2267e-04, 2.3974e-03, -1.8230e-03, 7.5307e-08, -6.6941e-04,\n -7.8193e-04, 1.7270e-04, -2.8829e-04, 1.2755e-03, -1.1605e-03,\n -8.5571e-05, -2.1510e-03, -5.6207e-04, 1.5339e-03, -2.8894e-03,\n 5.1715e-06, -5.6052e-45, -1.1987e-04, -1.9933e-03, -6.6907e-04,\n 6.1965e-24, 2.0977e-04, 1.2725e-03, 5.6052e-45, 1.8509e-04,\n -4.2754e-04, 5.6052e-45, 2.1769e-03, 1.0384e-04, 1.6764e-03,\n 6.1372e-04, 4.7413e-04, 2.0586e-03, -3.0762e-03, 3.1514e-04,\n 1.4208e-03, 2.0734e-05, -9.7741e-04, 2.0391e-03, 1.6240e-14,\n 5.6052e-45, -1.8627e-03, -4.6027e-03, 4.1200e-04, 2.7234e-03,\n 1.7108e-03, -3.2127e-04, -9.4812e-04, 5.6052e-45, -3.9411e-03,\n 4.9644e-04, -3.6147e-03, 7.6441e-05, 5.6052e-45, 6.4152e-04,\n 1.4312e-03, -6.1238e-04, 4.7829e-04, 3.2941e-03, 5.6052e-45,\n 5.6052e-45, -1.2758e-03, -8.9139e-04, -3.5562e-04, 1.6980e-04,\n 7.5192e-04, -1.5328e-03, -1.0477e-03, 5.6052e-45, -4.7442e-04,\n -1.0503e-03, -2.0941e-04, -5.1279e-03, 8.2279e-05, 1.9237e-03,\n 5.6052e-45, -1.2216e-04, -1.4893e-03, 1.4088e-03, -1.7684e-04,\n 1.5857e-03, -7.5531e-06, 8.8005e-04, 1.2262e-03, -2.2247e-04,\n 1.1890e-03, 3.6135e-05, -1.3650e-03, 1.1019e-04, -6.6627e-04,\n 4.1960e-04, 2.8608e-03, 3.2458e-03, 4.4798e-04, 5.5125e-05,\n 8.9767e-04, -2.0677e-03, 3.0639e-03, -8.6294e-04, 3.7573e-03,\n -7.2673e-04, 1.3558e-03, 8.0360e-04, 5.6052e-45, -1.1249e-03,\n -1.0344e-03, 6.8387e-04, 5.2949e-04, -2.9708e-03, 3.1472e-03,\n 1.4993e-03, 7.5004e-04, -2.9475e-03, 5.6349e-04, 4.1170e-03,\n -1.9166e-03, -3.0506e-03, -1.4954e-03, 8.3250e-04, 1.7061e-04,\n 4.2885e-04, -9.7707e-04, 2.1697e-03, 1.3977e-04, 2.6650e-03,\n 7.0468e-05, 5.6052e-45, -6.6793e-04, 1.7716e-03, 6.5031e-04,\n -1.2921e-03, 9.2144e-04, 8.7300e-04, -2.5781e-04, 1.1980e-04,\n -1.3322e-04, -5.5072e-04, 5.6052e-45, -4.6666e-03, 9.9524e-04,\n -5.6931e-04, 4.4578e-04, -1.3028e-03, -3.9469e-05, 2.7959e-04,\n -3.4428e-37, 9.3859e-04, 5.6052e-45, 7.5188e-04, -7.9472e-04,\n 3.4993e-03, -1.2679e-03, 9.3277e-04, -2.2040e-03, 3.5127e-04,\n 4.9395e-04, 1.2180e-03, -3.1301e-03, -1.1774e-03, -1.2080e-03,\n -3.8436e-04, 2.8282e-04, 6.7124e-04, 1.4642e-03, 5.4226e-04,\n -1.4612e-04, -1.2069e-03, -2.0727e-03, 2.0539e-06, 9.1835e-04,\n -3.5770e-04, 3.6916e-04, -2.1207e-04, -1.9126e-03, 1.4378e-03,\n 6.6016e-04, 4.3976e-03, 5.6052e-45, 5.6052e-45, 8.1742e-04,\n 3.8007e-04, 2.2362e-03, 1.3500e-03, 2.1797e-03, -8.5256e-04,\n 1.3464e-04, -1.3407e-03, 7.8234e-04, -1.0598e-04, -1.2915e-03,\n 1.9882e-03, 2.4040e-04, 1.1723e-03, 2.8819e-03, -9.9809e-04,\n -8.5895e-04, -1.0056e-04, 4.0674e-04, 2.4588e-03, 1.9608e-04,\n -4.6726e-04, 2.4874e-03, 5.2214e-03, -3.3050e-03, 5.6052e-45,\n -6.1393e-03, 1.2381e-03, -2.0519e-03, -4.9399e-04, -2.0860e-04,\n 4.9380e-04, -7.3929e-04, -1.2120e-04, 1.3940e-03, 4.1112e-04,\n 1.6549e-03, 5.6052e-45, -1.0374e-03, 2.1756e-03, -5.4903e-04,\n 1.0074e-03, -7.2374e-04, 2.6860e-04, -3.0276e-05, 1.0967e-03,\n -1.9524e-03, -1.0634e-03, 9.6021e-04, -4.5806e-03, -9.3920e-04,\n -2.1870e-04, -5.6079e-04, 4.4154e-05, 7.0425e-04, 1.4709e-03,\n 4.2267e-03, -1.2981e-04, 1.3461e-04, 8.6668e-04, -2.7276e-03,\n 4.4787e-04, 1.3476e-03, -3.7332e-04, 1.4239e-03, 1.2915e-03,\n -7.0842e-04, -1.0320e-03, 3.7945e-04, 5.6052e-45, 3.8741e-04,\n 1.2879e-03, 5.6052e-45, -8.9241e-04, -1.3345e-03, -5.3103e-03,\n -2.2584e-03, -1.3471e-03, 1.1087e-03, -8.0159e-04, 2.2230e-03,\n -1.0370e-03, -4.7341e-04, 9.3248e-04, -2.2820e-34, 5.6052e-45,\n 9.5689e-04, 3.0996e-03, 1.0250e-03, 2.5287e-03, 6.3804e-04,\n -1.5270e-03, -2.7615e-04, -2.5267e-04, 6.7697e-04, 2.8193e-03,\n -1.2876e-04, -1.2888e-03, 5.6052e-45, 2.7561e-03, 1.4491e-05,\n 5.6052e-45, 2.1475e-03, 5.6052e-45, 8.4788e-04, -4.7463e-03,\n 1.3473e-03, 8.9887e-04, 5.6052e-45, 1.4217e-03, -4.9090e-03,\n 3.0088e-03, -6.5861e-04, -5.3771e-03, 8.3741e-04, -3.4183e-03,\n 2.4886e-04, 9.8072e-04, 4.5916e-04, -1.0990e-03, 5.6052e-45,\n -5.1696e-04, 4.8440e-04, -4.5510e-03, 5.6052e-45, 1.9971e-03,\n -6.5871e-04, -5.4696e-04, 4.4145e-04, -1.6760e-03, -8.5020e-04,\n -6.2415e-04, -1.8507e-03, 1.4622e-03, -3.2481e-03, -9.2086e-04,\n 3.2666e-04, 8.0873e-04, 5.6052e-45, 2.6666e-04, -2.9023e-03,\n 5.6052e-45, -9.8574e-04, -2.7962e-04, 1.5615e-04, -2.9323e-03,\n 6.7231e-12, 1.8411e-03, -8.3307e-04, 7.1975e-04, 5.6052e-45,\n -9.5483e-05, -7.4895e-04, 5.6052e-45, -1.6758e-04, -5.7952e-04,\n -1.2591e-03, -2.7694e-03, 5.6052e-45, -1.0466e-03, 3.6761e-04,\n -1.5731e-03, 7.2439e-04, -4.0236e-04, 3.7771e-05, 9.5313e-04,\n 2.3660e-03, 5.6052e-45, -1.0273e-03, 5.6052e-45, -1.0929e-03,\n 8.3823e-04, 3.7339e-04, -8.0509e-05, 4.7963e-05, 1.4250e-03,\n -1.1593e-03, -1.6976e-03, -5.5229e-05, -1.5914e-03, -4.2776e-04,\n 5.6052e-45, 2.3358e-04, -4.5627e-03, -3.2662e-03, 1.2686e-04,\n -1.5013e-04, 2.3623e-03, 2.2785e-03, -1.3989e-03, 6.9224e-04,\n -1.1806e-03, 5.6052e-45, 6.8883e-04, -9.5046e-04, -1.2031e-03,\n 5.7518e-05, -8.3869e-03, -2.7011e-03, -1.3202e-03, 5.1939e-04,\n 2.8636e-03, -4.4298e-03, -1.0171e-03, 9.2249e-04, 5.6052e-45,\n 5.6052e-45, -1.1843e-03, 3.6278e-03, -1.4213e-03, 1.7748e-03,\n -2.0510e-03, -1.4381e-03, -1.1727e-03, -1.0068e-03, -5.6593e-04,\n -9.5728e-04, 1.5139e-04, -2.0254e-03, -1.4656e-04, 3.4529e-04,\n 2.1844e-04, -2.2310e-03, 6.5388e-04, 7.3588e-05, 6.4380e-27,\n -2.3054e-03, 1.0189e-03, 5.6052e-45, 5.6052e-45, 9.9006e-04,\n 4.6500e-04, 7.0753e-04, 2.0556e-03, -2.6589e-04, 8.4278e-04,\n 1.4235e-03, -3.4427e-03, 1.2201e-03, -3.8728e-04, 5.5105e-04,\n 1.2138e-03, -2.1050e-03, 1.6906e-03, 2.1028e-03, 6.4352e-04,\n 2.5902e-03, 1.4057e-03, -1.7836e-03, -1.3276e-03, 6.4423e-04,\n 5.5246e-04, -7.6548e-04, -1.1630e-03, -3.0730e-03, 5.2290e-04,\n -2.6136e-04, 2.2622e-04, 1.2981e-03, 7.0619e-04, 8.2525e-04,\n -1.7701e-04, 7.7137e-06, -1.8519e-03, -1.2987e-03, -4.4375e-06,\n 2.4185e-05, 5.6052e-45, -1.1175e-03, 2.8010e-03, -1.5779e-03,\n 8.6225e-04, 5.6052e-45, -1.1465e-03, 7.8957e-04, -7.3214e-04,\n -1.3058e-03, -4.9303e-03, 1.0742e-03, 5.4710e-03, -1.8958e-03,\n 5.6857e-05, -4.5310e-05, 1.5966e-03, 1.0438e-03, -1.7156e-03,\n -1.3517e-03, -1.5308e-03, -2.0269e-03, 2.7084e-03, 1.0559e-05,\n 9.8418e-04, -1.2293e-03, 1.4061e-04, 1.8254e-03, 6.6775e-04,\n -2.9655e-04, 4.3551e-03, 1.7863e-03, -1.8031e-03, -2.1927e-03,\n -1.4684e-03, -3.4495e-04, 1.4723e-04, 2.7523e-04, 4.4844e-04,\n -2.5797e-03, 2.1401e-03, -4.7214e-04, -1.5152e-03, 1.3795e-17,\n 9.1120e-13, -1.4675e-03, 2.3385e-03, -3.2316e-04, 2.5272e-04,\n -8.7744e-04, 7.5234e-05, -5.3239e-04, -4.9507e-04, 2.0739e-03,\n 3.0889e-04, -2.0431e-11, -1.6454e-03, -5.9457e-04, 7.9547e-04,\n -4.1667e-03, -4.7809e-04, 1.6211e-04, 2.9867e-13, -4.0283e-05],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.3021e-05, 5.3796e-05, 2.3192e-05, 1.9576e-05, 1.3870e-05, 4.6743e-06,\n 3.5283e-05, 3.1518e-05, 2.3867e-05, 2.9973e-05, 2.9902e-05, 6.0132e-05,\n 1.2312e-08, 2.8891e-05, 3.1634e-05, 3.4801e-05, 5.2221e-05, 3.4915e-05,\n 2.3299e-05, 3.2055e-09, 2.8631e-05, 1.9722e-05, 2.1528e-05, 3.5841e-05,\n 1.1701e-08, 4.2761e-05, 2.9477e-05, 4.1802e-05, 3.7231e-05, 3.7278e-05,\n 4.3057e-05, 3.2981e-05, 4.1089e-08, 2.0666e-09, 3.4348e-05, 2.8126e-05,\n 3.5782e-05, 2.0518e-05, 2.5256e-05, 3.3177e-05, 3.5363e-05, 2.2497e-05,\n 3.1784e-05, 2.8190e-05, 3.3807e-05, 3.4928e-05, 2.9906e-05, 5.0382e-05,\n 5.8548e-05, 2.6628e-05, 3.6741e-05, 1.2275e-09, 1.9919e-09, 1.8641e-09,\n 4.3087e-05, 6.2912e-05, 2.4602e-05, 2.5460e-05, 3.0583e-06, 3.8701e-05,\n 3.2290e-05, 3.2015e-05, 6.3771e-05, 1.6302e-05, 3.8337e-05, 3.6245e-05,\n 3.1571e-05, 2.2816e-09, 4.9863e-05, 3.5150e-05, 1.2132e-05, 9.3199e-09,\n 3.9410e-05, 2.9904e-05, 1.3435e-05, 4.0259e-05, 4.2287e-05, 4.6600e-05,\n 2.4750e-05, 4.2024e-05, 5.6549e-05, 3.0509e-05, 4.1087e-05, 2.6102e-05,\n 3.1179e-05, 3.3890e-05, 3.0759e-05, 1.9892e-05, 3.3902e-05, 3.4761e-05,\n 1.4549e-08, 4.2006e-05, 2.9632e-05, 2.5834e-05, 5.0605e-05, 1.4532e-05,\n 3.7315e-05, 3.2166e-05, 2.9248e-05, 2.3110e-05, 2.4368e-05, 1.1931e-05,\n 4.6217e-05, 4.6762e-05, 7.6658e-05, 2.5875e-05, 2.9145e-05, 5.0365e-05,\n 1.5352e-05, 2.0027e-05, 4.8282e-05, 4.5413e-05, 9.8368e-10, 2.3563e-05,\n 1.0647e-05, 2.4692e-05, 3.6042e-05, 5.6803e-05, 7.9077e-09, 1.0627e-07,\n 1.5955e-09, 3.0607e-05, 1.2704e-09, 6.2054e-05, 3.1091e-05, 4.3240e-05,\n 4.3617e-05, 2.3898e-10, 1.4798e-05, 3.6089e-05, 3.0454e-05, 3.6172e-05,\n 3.8344e-05, 4.2634e-10, 3.7936e-05, 3.4436e-05, 2.2107e-05, 2.7341e-05,\n 3.2417e-05, 2.6234e-05, 5.8949e-05, 3.3497e-05, 4.8785e-05, 3.3006e-05,\n 3.6422e-05, 3.8835e-05, 1.8814e-08, 3.6080e-05, 3.3121e-05, 2.7388e-05,\n 1.6154e-08, 2.0383e-05, 2.9089e-05, 8.6575e-09, 2.8277e-05, 4.2450e-05,\n 1.7150e-08, 4.9663e-05, 2.8028e-05, 3.6365e-05, 2.6683e-05, 3.3160e-05,\n 9.1455e-05, 3.4862e-05, 3.2651e-05, 2.6349e-05, 5.2580e-05, 2.0083e-05,\n 3.4769e-05, 2.0374e-09, 3.1397e-09, 3.1540e-05, 5.6469e-05, 2.4352e-05,\n 3.0787e-05, 4.6977e-05, 1.4707e-05, 3.9183e-05, 5.6981e-10, 1.9528e-05,\n 3.4749e-05, 3.8754e-05, 8.1521e-06, 4.1926e-09, 3.2684e-05, 3.4385e-05,\n 5.2406e-05, 2.1556e-05, 2.9397e-05, 1.2175e-09, 1.3949e-08, 3.0815e-05,\n 1.5989e-05, 4.0221e-05, 3.0197e-05, 3.4250e-05, 2.0810e-05, 3.7347e-05,\n 2.1987e-09, 2.9400e-05, 2.4720e-05, 1.5223e-05, 4.8300e-05, 1.8443e-05,\n 6.6025e-05, 1.0844e-10, 2.6224e-05, 3.2083e-05, 2.6022e-05, 2.9667e-05,\n 1.7494e-05, 3.5440e-05, 3.9505e-05, 3.2366e-05, 2.4208e-05, 2.7164e-05,\n 2.3098e-05, 3.9995e-05, 3.4779e-05, 2.2863e-05, 2.0063e-05, 3.4085e-05,\n 2.4757e-05, 2.8124e-05, 3.2091e-05, 3.5307e-05, 4.7019e-05, 5.0956e-05,\n 1.9995e-05, 4.4444e-05, 3.8122e-05, 6.3308e-05, 3.8108e-05, 2.6956e-09,\n 3.6300e-05, 1.7641e-05, 2.7193e-05, 1.9847e-05, 7.6086e-05, 3.4214e-05,\n 2.9563e-05, 3.6952e-05, 3.4829e-05, 2.7645e-05, 4.6009e-05, 2.1137e-05,\n 1.9417e-05, 4.6554e-05, 2.1130e-05, 2.2815e-05, 3.5201e-05, 2.8074e-05,\n 2.6856e-05, 2.9291e-05, 3.4602e-05, 2.5958e-05, 1.1615e-08, 2.5153e-05,\n 2.2546e-05, 1.3961e-05, 3.4100e-05, 4.4989e-05, 3.9747e-05, 3.2497e-05,\n 3.3854e-05, 2.7848e-05, 2.6141e-05, 1.7217e-09, 4.4194e-05, 3.4883e-05,\n 2.4571e-05, 4.7906e-05, 2.5940e-05, 2.2224e-05, 3.9696e-05, 3.0047e-09,\n 3.2895e-05, 8.4901e-09, 2.3303e-05, 1.5102e-05, 2.9813e-05, 2.7634e-05,\n 4.5131e-05, 3.8425e-05, 3.1950e-05, 3.9720e-05, 4.8931e-06, 5.1211e-05,\n 4.3416e-05, 2.2144e-05, 8.6966e-06, 3.3139e-05, 4.2042e-05, 3.5721e-05,\n 2.8013e-05, 6.0523e-05, 6.1655e-05, 3.3952e-05, 3.1232e-05, 6.8186e-05,\n 3.3724e-05, 8.9881e-06, 8.5592e-06, 2.6892e-05, 3.2744e-05, 1.3551e-05,\n 3.6961e-05, 3.5738e-10, 7.0405e-09, 3.4512e-05, 4.8325e-05, 2.1467e-05,\n 3.0108e-05, 4.1228e-05, 4.5396e-05, 2.9115e-05, 3.8819e-05, 2.4588e-05,\n 2.3402e-05, 3.8760e-05, 4.6794e-05, 5.1113e-05, 4.1906e-05, 2.0439e-05,\n 1.3448e-05, 2.9455e-05, 2.9132e-05, 1.0425e-05, 2.7550e-05, 3.6300e-05,\n 9.8941e-06, 2.1586e-05, 3.8315e-05, 5.1747e-05, 8.3993e-13, 3.4223e-05,\n 2.2947e-05, 4.7360e-05, 1.9721e-05, 3.3140e-05, 7.7601e-06, 3.0554e-05,\n 2.5828e-05, 2.7024e-05, 2.7939e-05, 4.3084e-05, 1.2292e-08, 1.0357e-05,\n 3.4917e-05, 3.4287e-05, 2.8434e-05, 3.7022e-05, 2.6428e-05, 2.8519e-05,\n 5.3335e-05, 3.4349e-05, 3.2266e-05, 3.0315e-05, 4.2417e-05, 9.8085e-06,\n 1.6924e-05, 2.0284e-05, 4.2453e-05, 5.2236e-05, 3.7341e-05, 8.1510e-05,\n 1.5209e-05, 4.1039e-05, 4.4343e-05, 3.4439e-05, 3.9659e-05, 2.2527e-05,\n 2.7930e-05, 9.7808e-06, 2.8062e-05, 5.0856e-05, 7.0425e-05, 1.1067e-05,\n 5.3510e-09, 3.5086e-05, 2.3238e-05, 2.9196e-08, 2.6405e-05, 5.8847e-05,\n 3.4002e-05, 2.7950e-05, 3.3519e-05, 3.7437e-05, 2.1100e-05, 2.0714e-05,\n 4.5788e-05, 4.2879e-05, 2.9109e-05, 2.9099e-09, 9.6286e-09, 3.0943e-05,\n 3.5061e-05, 3.2220e-05, 5.3933e-05, 3.0235e-05, 3.3810e-05, 2.0114e-05,\n 1.3045e-05, 2.4553e-05, 3.1477e-05, 4.6090e-05, 2.4217e-05, 3.5459e-09,\n 1.4374e-05, 1.0064e-05, 1.0215e-09, 1.2434e-05, 1.7821e-09, 4.3505e-05,\n 4.1417e-05, 1.5926e-05, 2.9558e-05, 4.8114e-09, 1.7379e-05, 4.2585e-05,\n 2.4789e-05, 3.8084e-05, 4.4486e-05, 4.2984e-05, 3.2733e-05, 4.9512e-06,\n 2.9949e-05, 9.1057e-06, 2.0809e-05, 2.4184e-09, 2.0992e-05, 4.1097e-05,\n 3.7315e-05, 3.0906e-09, 2.0551e-05, 2.9652e-05, 4.5617e-05, 3.5368e-05,\n 3.0223e-05, 2.5539e-05, 5.0838e-05, 1.7679e-05, 3.1876e-05, 4.9207e-05,\n 2.6215e-05, 1.1404e-05, 3.4683e-05, 1.0431e-09, 2.7868e-05, 2.3964e-05,\n 3.4609e-08, 2.3299e-05, 2.7027e-05, 6.1198e-06, 3.1471e-05, 1.1836e-08,\n 1.9667e-05, 3.7060e-06, 2.9034e-05, 4.6722e-08, 3.0893e-05, 5.2606e-05,\n 3.0377e-10, 2.6803e-05, 8.8889e-06, 3.3936e-05, 2.7664e-05, 2.4809e-09,\n 4.2794e-05, 2.0791e-05, 5.4504e-05, 4.1544e-05, 2.4931e-05, 1.6742e-05,\n 2.1599e-05, 2.8099e-05, 1.7244e-08, 3.8987e-05, 5.4751e-09, 9.1970e-06,\n 7.5787e-06, 9.8713e-06, 3.3283e-05, 3.3202e-05, 6.5534e-05, 2.2811e-05,\n 4.0472e-05, 2.4975e-05, 3.8881e-05, 2.6558e-05, 2.2972e-09, 3.0746e-05,\n 3.6462e-05, 6.2818e-05, 6.9899e-06, 1.9340e-05, 2.6103e-05, 2.1280e-05,\n 3.5423e-05, 1.6750e-05, 2.8258e-05, 1.7046e-08, 2.1112e-05, 4.2354e-05,\n 3.1138e-05, 5.7836e-05, 4.2856e-05, 3.0896e-05, 3.7811e-05, 3.3821e-05,\n 3.4407e-05, 7.5980e-05, 2.5047e-05, 3.7260e-05, 1.2794e-08, 1.4121e-08,\n 4.0070e-05, 7.8114e-05, 4.7140e-05, 2.9760e-05, 3.6503e-05, 3.3174e-05,\n 3.0037e-05, 4.4816e-05, 2.9107e-05, 6.8634e-06, 2.7027e-05, 3.5038e-05,\n 2.7443e-05, 2.4536e-05, 1.2112e-06, 2.5357e-05, 3.4235e-05, 2.3028e-05,\n 1.2855e-08, 2.6076e-05, 3.4987e-05, 1.0413e-09, 3.1330e-09, 3.4512e-05,\n 3.6148e-05, 3.9249e-05, 2.1503e-05, 3.6551e-05, 1.8796e-05, 3.1502e-05,\n 2.9118e-05, 1.8537e-05, 1.9520e-05, 3.6658e-05, 3.3174e-05, 2.7364e-05,\n 4.3295e-05, 4.4067e-05, 3.7220e-05, 2.3796e-05, 3.1854e-05, 9.6650e-05,\n 2.6350e-05, 3.1836e-05, 1.7949e-05, 3.0459e-05, 3.4244e-05, 6.2472e-05,\n 8.2721e-06, 2.5630e-05, 4.4750e-05, 2.3149e-05, 3.6748e-05, 2.6096e-05,\n 3.8793e-05, 2.5322e-05, 3.3730e-05, 4.4914e-05, 6.3714e-06, 3.5338e-08,\n 1.8693e-08, 2.3696e-05, 2.8240e-05, 3.4460e-05, 3.2027e-05, 3.5579e-08,\n 2.7633e-05, 3.2844e-05, 1.9751e-05, 3.5034e-05, 2.7730e-05, 2.5388e-05,\n 4.8436e-05, 3.9564e-05, 3.1841e-05, 6.3018e-05, 3.7408e-05, 4.8565e-05,\n 2.6360e-05, 3.9256e-05, 3.5083e-05, 3.4454e-05, 4.5758e-05, 6.0806e-05,\n 4.2985e-05, 2.0496e-05, 2.9045e-05, 2.9166e-05, 3.3929e-05, 2.4145e-05,\n 2.7234e-05, 3.2736e-05, 6.2173e-05, 5.3642e-05, 3.6644e-05, 3.8516e-05,\n 3.4835e-05, 1.5030e-05, 1.7896e-05, 3.3204e-05, 2.9165e-05, 3.3108e-05,\n 1.9299e-05, 1.2427e-08, 3.2354e-08, 7.5531e-06, 3.7234e-05, 3.2571e-05,\n 3.3181e-05, 3.3094e-05, 3.0623e-05, 2.4267e-05, 2.4659e-05, 4.7492e-05,\n 2.5689e-05, 5.9926e-09, 8.0428e-06, 3.9438e-05, 2.8938e-05, 4.7946e-05,\n 3.3418e-05, 2.8880e-05, 8.5901e-09, 2.2376e-05], device='cuda:0')" + "step": "tensor(18780.)", + "exp_avg": "tensor([-8.9116e-04, 5.2815e-04, -1.0678e-04, ..., -1.4846e-05,\n -4.2124e-04, -2.2836e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.2426e-05, 1.1560e-05, 3.5580e-07, ..., 1.2203e-05, 1.3738e-05,\n 5.2471e-06], device='cuda:0')" }, "2": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[ 5.6198e-06, -1.5067e-05, 3.8120e-05, ..., 3.6708e-06,\n -7.9779e-15, 1.5094e-05],\n [ 7.6120e-07, -2.6588e-05, 9.2524e-05, ..., 1.5356e-05,\n -2.8971e-15, -2.8584e-06],\n [ 4.8004e-06, 3.8724e-05, 2.9184e-05, ..., -6.7134e-06,\n -9.3192e-16, -3.1998e-05],\n ...,\n [ 1.3938e-05, -1.9265e-05, -8.7151e-07, ..., -1.8440e-05,\n 9.5435e-15, -3.3535e-05],\n [ 2.1527e-07, 6.5886e-07, 3.0513e-06, ..., -3.3036e-05,\n -3.9366e-15, -2.0113e-05],\n [ 6.0556e-06, 5.2375e-05, 2.7317e-05, ..., -1.1747e-05,\n 3.7151e-15, 2.9616e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0550e-09, 4.0851e-09, 5.4619e-09, ..., 5.2441e-09, 4.7423e-12,\n 4.1123e-09],\n [2.4361e-09, 1.3603e-08, 1.0108e-08, ..., 1.1028e-08, 3.5529e-12,\n 5.9625e-09],\n [1.3159e-09, 8.5150e-09, 1.0370e-08, ..., 7.8096e-09, 7.0392e-12,\n 1.0625e-08],\n ...,\n [2.3517e-09, 8.5889e-09, 1.2553e-08, ..., 9.4604e-09, 7.5548e-12,\n 1.3323e-08],\n [2.1186e-09, 8.7608e-09, 9.9121e-09, ..., 9.1695e-09, 5.5073e-12,\n 3.0985e-08],\n [2.3974e-09, 9.1584e-09, 1.2790e-08, ..., 1.4243e-08, 1.0875e-11,\n 6.2020e-09]], device='cuda:0')" + "step": "tensor(18780.)", + "exp_avg": "tensor([[ 9.6772e-08, -1.0532e-06, -1.2555e-10, ..., 1.5626e-05,\n -8.0213e-08, -1.7194e-06],\n [-2.1519e-06, 6.5058e-08, 4.3754e-07, ..., 7.2337e-06,\n 2.1542e-06, 4.3289e-07],\n [ 7.4308e-07, 1.3095e-06, 4.7630e-07, ..., -2.7519e-06,\n -1.1673e-06, -4.7206e-07],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [-2.4431e-05, -1.0426e-05, -8.9766e-07, ..., -7.8740e-06,\n 1.4950e-06, 1.9064e-06],\n [-5.0004e-06, -1.3894e-06, -1.6448e-07, ..., -3.5150e-07,\n -2.9186e-06, 2.2330e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.9107e-10, 2.0137e-10, 1.6040e-11, ..., 1.3391e-09, 3.8042e-10,\n 4.4457e-10],\n [3.2800e-09, 7.9614e-11, 1.2203e-09, ..., 7.1816e-10, 6.8579e-10,\n 5.4440e-11],\n [6.2980e-10, 2.5110e-09, 2.3256e-11, ..., 1.2627e-09, 2.4084e-09,\n 4.4991e-11],\n ...,\n [0.0000e+00, 3.7959e-24, 0.0000e+00, ..., 1.1120e-25, 1.3281e-25,\n 0.0000e+00],\n [4.9476e-09, 4.5447e-10, 5.3518e-10, ..., 2.9369e-09, 6.7674e-10,\n 5.5379e-10],\n [1.1652e-09, 1.1346e-09, 1.8738e-11, ..., 4.8805e-10, 4.4340e-09,\n 2.8373e-10]], device='cuda:0')" }, "3": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 4.5510e-06, -2.9309e-05, 9.1462e-07, ..., 8.2961e-06,\n 1.2030e-05, -8.5270e-07],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-1.4736e-05, -1.5622e-06, 7.3517e-06, ..., -8.8944e-06,\n -4.3507e-06, -9.2521e-06],\n [ 3.7801e-06, -1.0272e-05, -7.0826e-06, ..., -5.0855e-06,\n -8.7793e-06, -9.6691e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1141e-12, 1.4231e-12, 1.1524e-14, ..., 1.4970e-18, 5.3786e-14,\n 4.5932e-14],\n [8.8933e-10, 3.0109e-09, 2.2738e-10, ..., 2.4691e-10, 2.6338e-10,\n 1.8106e-10],\n [9.2400e-14, 1.3934e-13, 5.1435e-14, ..., 8.9105e-15, 1.2641e-14,\n 3.8168e-14],\n ...,\n [3.6251e-09, 5.2148e-09, 7.4398e-10, ..., 9.3777e-10, 1.7260e-09,\n 1.5085e-09],\n [3.9929e-09, 3.1558e-09, 8.3302e-10, ..., 1.3373e-09, 8.3181e-10,\n 8.2235e-10],\n [7.1817e-12, 6.0487e-12, 5.4617e-13, ..., 6.7548e-13, 1.1378e-12,\n 3.7960e-13]], device='cuda:0')" + "step": "tensor(18780.)", + "exp_avg": "tensor([ 3.4621e-05, -6.0521e-06, -1.3927e-05, 2.7677e-05, 1.9649e-04,\n -8.7466e-08, -6.9484e-05, 3.6905e-05, 8.1492e-06, 1.1586e-04,\n 9.9203e-05, 3.0083e-05, -2.7508e-05, 9.2706e-07, 8.3822e-06,\n 3.8077e-05, 5.3092e-05, -2.1306e-05, 3.4841e-05, 8.7388e-05,\n -8.3397e-05, 1.5005e-05, -3.9497e-05, 3.3934e-05, 5.4561e-05,\n -3.4047e-05, -2.1889e-05, 5.6052e-45, 4.0301e-07, -5.3232e-05,\n -1.5858e-04, -4.6528e-05, 3.1181e-05, -1.2903e-04, -9.3209e-07,\n -9.6643e-06, -5.3859e-05, 6.2221e-06, -5.8116e-05, -7.1692e-05,\n -2.5834e-05, 9.3747e-05, 4.6714e-05, -3.0028e-04, 2.0044e-05,\n 1.9370e-05, -1.1049e-05, -1.6318e-05, 3.2885e-04, 3.2401e-05,\n 5.3868e-05, 1.0893e-04, 7.5321e-05, -1.5563e-05, -4.9240e-05,\n 6.2830e-05, -2.3298e-05, -1.0455e-05, 2.1689e-05, -7.1596e-05,\n -4.7518e-06, -5.9887e-06, 2.5265e-04, 1.1654e-04, 2.1767e-06,\n -4.7493e-05, 3.4157e-05, -8.7365e-06, 4.9349e-05, 3.1030e-05,\n 1.3183e-04, 5.8310e-05, 2.6635e-05, 1.8488e-04, -6.2842e-06,\n -5.8040e-05, 5.9252e-06, -2.0430e-05, 1.5206e-05, 7.3990e-05,\n 7.9493e-05, 7.1360e-05, -9.5531e-06, 5.6052e-45, -8.4575e-06,\n -1.4190e-04, 1.0055e-05, -1.4973e-05, 2.9536e-05, 3.1875e-05,\n -6.4492e-05, -6.7156e-06, 1.0812e-05, 1.6367e-05, -7.1373e-06,\n 7.0857e-05, -2.9297e-04, -8.8835e-06, -6.3592e-05, 5.6939e-05,\n 7.0043e-05, 5.4977e-05, 6.2749e-05, -9.1496e-05, 5.4047e-05,\n -2.5317e-05, 4.2599e-05, 4.9410e-05, -4.7243e-05, 4.1673e-06,\n -4.1147e-05, 1.8143e-05, -1.8343e-05, 6.9375e-05, -1.1256e-05,\n -3.5846e-05, 1.2742e-04, -1.7592e-05, 2.5175e-05, 5.6052e-45,\n 1.3831e-04, 8.3052e-05, -6.6306e-05, -1.5544e-06, 2.9506e-05,\n -5.7608e-05, 4.4366e-05, -5.9751e-06, 5.0357e-05, 1.1545e-04,\n 3.2671e-06, 3.9868e-05, 5.6052e-45, -1.4541e-06, -4.0457e-05,\n 1.4024e-05, 1.4308e-06, 1.3072e-05, 2.6705e-05, 7.4651e-06,\n -1.0418e-05, 5.6052e-45, -2.2471e-05, 4.4268e-05, 5.6052e-45,\n 5.6052e-45, -1.2141e-04, -3.3402e-05, 5.6052e-45, -4.1864e-05,\n 6.6125e-05, 3.5965e-05, 7.8428e-05, 7.8908e-06, 2.6316e-06,\n 5.3956e-06, -2.2430e-05, 6.0428e-05, 7.2434e-23, 1.5445e-04,\n -5.1545e-05, -5.0552e-05, 4.6229e-05, -1.3030e-04, 3.9739e-06,\n -9.9830e-06, -2.1256e-05, 5.6052e-45, 4.5182e-05, -1.8742e-05,\n 2.6369e-07, 1.7088e-04, 2.3230e-04, 1.7571e-05, 4.7834e-05,\n -1.8539e-05, 2.2203e-05, 8.1563e-06, -7.1588e-05, -5.6231e-04,\n -5.5367e-05, -1.5159e-04, 4.9570e-05, 3.4521e-06, -2.3607e-05,\n 5.6052e-45, 5.7247e-05, -5.6846e-05, -6.2498e-05, 3.2731e-05,\n -7.0805e-06, 2.4602e-04, 1.2322e-05, 2.8985e-05, -1.4646e-04,\n -1.3002e-06, -1.5207e-05, 7.1319e-05, 1.5251e-04, 5.6052e-45,\n 6.8139e-06, -7.8416e-05, 6.9958e-05, 3.0753e-05, -2.5698e-06,\n 7.7584e-05, 2.4025e-06, 2.8652e-06, 4.6214e-05, -5.4871e-06,\n -3.5621e-05, 2.3531e-05, 2.0597e-05, 2.5664e-04, -1.4806e-05,\n 1.1858e-04, 2.0523e-05, -2.4776e-05, -9.4673e-05, -6.5493e-05,\n 1.9578e-06, 9.9904e-05, 1.1499e-06, 2.8570e-05, 3.2551e-05,\n 1.8976e-05, 3.7530e-05, -6.1313e-05, -2.4245e-04, -7.9631e-05,\n 3.5019e-05, 1.1754e-04, 5.1664e-05, -8.4095e-06, 5.6052e-45,\n 8.9030e-05, -1.0864e-06, 5.6052e-45, -5.6184e-05, -5.9273e-05,\n -5.1859e-06, -3.9976e-05, -3.5893e-06, 7.4794e-05, -3.6001e-05,\n 2.6641e-05, 1.0081e-04, 1.6413e-05, 8.5687e-05, -1.5802e-05,\n 2.3508e-05, -3.4748e-05, -6.2925e-05, 1.2951e-05, 7.1622e-05,\n -1.5521e-04, 5.6052e-45, 3.1477e-05, 5.5820e-06, 1.2455e-06,\n 5.6052e-45, -2.5469e-05, 1.5011e-05, 4.3112e-05, 8.8351e-05,\n -2.1071e-05, 6.8267e-06, 9.1923e-06, -1.6188e-05, 9.0281e-06,\n 6.6697e-05, -9.1997e-05, 1.0678e-05, 5.0791e-05, -1.8321e-04,\n -2.7905e-05, -2.0656e-05, -2.3165e-05, 1.5472e-05, 4.4849e-06,\n 3.0433e-05, -2.4607e-04, -2.0299e-05, -1.8656e-04, 3.2931e-05,\n -5.4093e-05, -8.1215e-05, -6.0259e-05, -1.3018e-04, -5.8325e-05,\n 8.3057e-05, 6.2582e-05, 2.1311e-05, 5.3867e-05, 3.7336e-05,\n -5.1771e-05, 4.8357e-05, -1.0513e-04, -5.0089e-05, 4.1601e-05,\n 5.3059e-05, -9.3722e-05, -2.6672e-05, -1.8929e-04, -3.1064e-04,\n -5.0579e-05, 2.7375e-05, -2.1854e-05, 2.2295e-05, 2.8483e-05,\n -1.0824e-04, 5.6052e-45, 2.3209e-04, 3.7342e-05, -8.2977e-07,\n 5.6052e-45, 2.1774e-06, -3.2904e-04, -7.0025e-06, 3.1312e-05,\n -3.7955e-05, -1.0741e-05, -2.7321e-05, 9.5664e-06, 2.2882e-05,\n 5.6052e-45, 5.3040e-05, 5.6968e-05, -5.7241e-06, -2.1558e-04,\n 2.5536e-05, 5.3847e-05, 9.5172e-06, 1.1613e-05, -1.1577e-04,\n -2.7204e-06, 4.1264e-05, -3.9049e-05, 5.6052e-45, -4.9913e-05,\n -1.7061e-05, -6.5952e-05, 6.5990e-05, -5.0633e-05, 1.0183e-05,\n -6.9365e-05, -3.3342e-05, 4.4069e-05, 3.1668e-06, -3.5195e-04,\n 9.2476e-05, -2.9790e-05, -6.4458e-05, 5.6052e-45, -6.1499e-05,\n 1.5902e-05, -7.5370e-06, -9.0981e-06, -7.3380e-05, 4.6427e-06,\n 3.4334e-06, 3.5227e-05, 5.6052e-45, 7.4908e-05, 9.8139e-06,\n 3.7080e-05, -1.5568e-05, 4.6352e-05, -9.9362e-05, 1.1216e-05,\n 1.9224e-05, 5.6379e-05, 6.4505e-05, 1.0119e-05, 1.1311e-05,\n -4.9309e-05, 3.2083e-05, -3.4200e-05, -1.2884e-05, 3.3517e-05,\n -1.1163e-04, -1.8734e-06, -2.3002e-05, 5.8747e-05, 5.6052e-45,\n 2.6051e-05, 4.5629e-05, -6.1667e-05, 9.8267e-05, -1.4520e-04,\n -5.7093e-05, -2.3068e-04, -3.6583e-05, 1.7904e-05, -1.0868e-06,\n 6.9979e-06, -6.5569e-06, 2.4962e-06, -1.6254e-05, 6.6921e-05,\n -3.0090e-07, 5.9150e-05, -2.7609e-05, 2.7635e-05, 1.9158e-05,\n -1.9735e-05, 3.6435e-06, 3.1665e-05, 4.7517e-05, -4.5698e-06,\n 6.6851e-05, 5.6052e-45, -2.4333e-06, -6.7531e-05, -2.7845e-05,\n 4.5965e-05, -3.2364e-05, 1.1591e-04, 4.2100e-05, 1.0191e-05,\n -1.1190e-04, -6.6697e-05, -7.2836e-04, 7.4948e-06, -1.6169e-07,\n -8.0171e-05, 1.2355e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 6.1278e-07, 2.0237e-05, 5.6832e-05, 4.8437e-05, -5.4610e-05,\n -3.9396e-05, 9.7661e-05, -1.8056e-05, -4.3841e-06, 4.7676e-05,\n -4.2848e-05, -2.0704e-05, 1.5575e-05, 4.3188e-14, -1.3557e-05,\n -2.0724e-04, 3.5533e-05, -6.6434e-05, -2.4944e-05, 6.2083e-06,\n -2.8106e-06, 3.0808e-05, -1.1815e-04, -3.9371e-05, 1.4347e-04,\n 2.6683e-05, 2.5051e-05, 2.0700e-04, 5.6052e-45, -2.9975e-05,\n 1.2963e-04, 3.7816e-05, -2.6812e-05, -4.7215e-06, -8.4189e-06,\n -2.5618e-04, -7.0430e-06, -2.6042e-05, 1.7942e-05, -6.1796e-05,\n 2.1896e-05, 5.6052e-45, 7.7582e-05, 9.2526e-05, 9.3560e-06,\n -3.9224e-05, -1.0298e-05, -1.0999e-04, 8.5143e-06, 8.3660e-05,\n -2.2068e-05, -6.1433e-05, 6.4447e-05, 1.4164e-05, -5.3610e-05,\n 1.0821e-04, -5.3915e-05, 1.4501e-05, -6.7559e-05, -1.7145e-05,\n -1.5610e-05, -1.5135e-05, -2.3199e-04, -4.6426e-05, -1.5156e-05,\n 1.3839e-06, 9.8665e-05, 3.5296e-05, -1.5212e-04, 2.3304e-05,\n -2.6134e-05, -2.2886e-05, -2.1840e-05, -2.6991e-05, 3.1321e-07,\n 2.9176e-05, -1.5876e-04, 6.9326e-05, 8.0643e-06, 4.6425e-05,\n 1.6585e-04, -3.5167e-04, 9.0180e-05, -1.2052e-05, -8.3919e-06,\n 2.7788e-05, 6.8199e-06, 2.2000e-05, 3.1140e-06, 7.9657e-05,\n -5.9095e-05, 9.2296e-05, 2.1499e-05, -2.2882e-05, 9.8176e-06,\n -8.0766e-05, -2.0777e-05, 3.8651e-06, 6.4188e-06, 8.7022e-05,\n 2.8527e-05, 1.4150e-04, 3.3357e-05, 1.6380e-05, -7.7408e-06,\n -5.6183e-05, -5.5269e-05, 5.6052e-45, 3.4691e-05, -2.6483e-05,\n -6.5507e-05, 2.1055e-05, -1.0232e-04, -8.1834e-05, -3.6826e-05,\n 5.6052e-45, 5.0015e-06, -4.6686e-06, 1.7011e-05, 3.5525e-06,\n 4.2016e-05, 4.5872e-05, 3.1321e-05, 6.9188e-05, -1.2838e-05,\n -3.1031e-04, -1.2314e-05, -4.8457e-05, 2.1957e-05, -2.4296e-06,\n 5.6052e-45, -2.2287e-05, 7.6566e-05, -3.1431e-05, 2.7859e-05,\n -6.9728e-06, 6.9981e-05, 5.6052e-45, 2.3714e-05, 5.6052e-45,\n -2.8836e-05, -5.6424e-05, 3.3518e-05, -5.3906e-05, 1.1802e-07,\n 1.0813e-04, 8.4948e-05, 1.3882e-05, -1.9553e-05, 3.0755e-05,\n 4.9711e-05, 2.9642e-05, -9.4276e-06, -1.9455e-05, 5.6052e-45,\n -9.6487e-05, -2.4991e-05, 3.6438e-05, 4.6644e-05, -8.4187e-05,\n 1.4322e-04, 5.6052e-45, 8.3898e-06, -5.4311e-05, -4.2238e-05,\n -4.6316e-06, -1.6911e-06, 3.0631e-05, -1.2099e-04, -2.1622e-05,\n 4.6824e-05, 5.7061e-05, 8.7077e-05, -8.1075e-05, -9.2136e-05,\n -2.2864e-05, 1.6268e-04, 6.6783e-06, -6.6767e-06, -3.5497e-05,\n 6.6689e-05, 1.9343e-05, -1.3976e-05, -4.1980e-05, 5.9356e-05,\n 5.6052e-45, 1.0175e-04, 5.6052e-45, -2.6307e-05, 2.6082e-04,\n 1.7146e-05, 1.7511e-04, 1.3585e-04, 4.7737e-05, -3.9206e-05,\n -4.1410e-05, 5.6052e-45, -2.5839e-05, -2.7504e-05, 8.8669e-06,\n 1.4107e-05, 2.1480e-06, 5.6052e-45, 5.6052e-45, 1.8377e-04,\n 6.1488e-05, 5.6052e-45, 9.4101e-08, -2.6616e-05, -1.3172e-05,\n -2.7555e-05, 5.6052e-45, 5.7820e-06, 3.0213e-05, -5.9562e-05,\n 2.4865e-05, 1.8640e-04, -2.0979e-04, -8.9726e-05, -8.5237e-06,\n 7.1102e-06, -9.1984e-06, 1.5400e-04, 9.3750e-05, 7.0515e-05,\n -7.2547e-05, 1.1751e-05, 4.9106e-05, 4.3010e-05, 2.2399e-06,\n -7.9263e-05, 1.1624e-05, 6.4612e-05, -1.3558e-05, 1.1426e-04,\n -9.0544e-05, -1.3019e-05, 1.9741e-05, -1.8260e-06, -2.5957e-06,\n -8.2341e-05, 5.6052e-45, -4.1657e-05, -9.4084e-05, 2.1908e-05,\n 5.2086e-05, 5.1069e-05, 1.2063e-04, 6.4987e-05, 5.6052e-45,\n -5.1128e-05, 2.6917e-05, -9.4591e-05, 3.6994e-06, 2.3239e-05,\n 1.5340e-05, 4.6010e-05, 3.2306e-05, -1.8904e-06, -1.0776e-04,\n -8.2568e-05, 4.4115e-05, 6.7820e-06, -3.9666e-05, 1.0444e-04,\n 1.6581e-04, -3.9526e-05, -2.6818e-05, 9.8121e-05, -9.7720e-05,\n 6.0927e-05, 1.3733e-04, -5.6897e-05, 5.9973e-05, 3.4027e-05,\n 2.8965e-05, 5.6052e-45, -2.5067e-04, -1.3548e-05, 5.6052e-45,\n 1.6312e-04, -4.4524e-05, 2.5449e-05, -1.0973e-05, -2.4280e-05,\n -6.6169e-05, 3.0060e-05, -1.3573e-04, 1.1367e-04, 5.7665e-05,\n 5.6052e-45, -1.0507e-04, -4.3739e-05, 1.5826e-05, 7.8578e-05,\n -1.4074e-06, -7.1694e-06, 5.2194e-05, 1.2166e-05, -1.6569e-05,\n -1.1435e-05, -1.9850e-04, -7.5421e-06, -3.7642e-06, -8.7315e-05,\n 2.1042e-06, -1.0411e-04, 3.8255e-05, 5.8752e-05, -3.1294e-06,\n 3.0108e-05, -5.1892e-05, -4.7300e-06, 5.7357e-05, -7.6675e-05,\n -1.2139e-04, -1.2375e-04, 4.0550e-05, 3.6853e-05, 3.6114e-05,\n -1.1478e-05, -4.8477e-05, 5.6907e-06, 2.1076e-05, -4.6094e-05,\n -3.4281e-05, 3.5300e-06, 7.8591e-05, 6.4755e-06, 1.2495e-04,\n 4.2596e-05, -3.7667e-05, 7.5786e-06, -2.5998e-05, 5.6052e-45,\n 5.6052e-45, -1.8831e-05, 2.3855e-05], device='cuda:0')", + "exp_avg_sq": "tensor([5.1733e-08, 6.5139e-08, 7.4993e-08, 6.3792e-08, 9.7655e-08, 1.4544e-10,\n 9.4513e-08, 5.8514e-08, 4.1454e-08, 7.6851e-08, 7.4335e-08, 3.2196e-08,\n 6.5377e-08, 3.4610e-08, 5.6689e-08, 8.2312e-08, 6.2317e-08, 5.5964e-08,\n 7.2666e-08, 4.5827e-08, 4.5688e-08, 5.9624e-08, 5.0490e-08, 8.2817e-08,\n 1.0523e-07, 1.1358e-07, 5.9700e-08, 4.2226e-16, 7.4605e-08, 7.3897e-08,\n 3.6298e-08, 7.6501e-08, 5.5518e-08, 7.1700e-08, 4.9715e-08, 8.2717e-08,\n 4.1432e-08, 5.4523e-08, 6.7473e-08, 6.0442e-08, 8.4114e-08, 7.0490e-08,\n 9.5622e-08, 8.5495e-08, 6.1315e-08, 5.1458e-08, 6.2698e-08, 9.5424e-08,\n 1.3744e-07, 8.9494e-08, 2.4491e-08, 1.3077e-07, 5.1284e-08, 4.8026e-08,\n 6.1779e-08, 4.7198e-08, 8.9901e-08, 4.8077e-08, 3.6177e-08, 6.2354e-08,\n 3.7153e-08, 5.4477e-08, 1.1116e-07, 4.3066e-08, 2.8787e-08, 6.6478e-08,\n 8.9773e-08, 6.3090e-08, 3.6288e-08, 5.9517e-08, 7.0789e-08, 1.5906e-08,\n 4.7152e-08, 1.7086e-07, 1.1540e-07, 8.8896e-08, 7.1204e-08, 1.0396e-07,\n 5.8446e-08, 8.6582e-08, 5.4011e-08, 7.4989e-08, 7.8420e-08, 3.3266e-15,\n 6.5792e-08, 7.7432e-08, 5.2886e-08, 9.8904e-08, 7.2398e-08, 1.0537e-07,\n 9.3548e-08, 5.8127e-08, 6.3486e-08, 7.3023e-08, 4.9420e-08, 6.8831e-08,\n 8.8096e-08, 4.5104e-08, 5.6507e-08, 7.9698e-08, 8.2533e-08, 5.7521e-08,\n 1.6827e-07, 5.5386e-08, 7.7199e-08, 6.9588e-08, 4.9881e-08, 5.2423e-08,\n 5.1621e-08, 4.2919e-08, 5.6354e-08, 9.2721e-08, 5.2150e-08, 3.9103e-08,\n 7.2927e-08, 6.5995e-08, 6.6699e-08, 6.0091e-08, 3.8993e-08, 8.6444e-14,\n 6.2582e-08, 7.0630e-08, 5.2088e-08, 7.6348e-10, 5.2966e-08, 7.3546e-08,\n 7.0197e-08, 5.9708e-08, 8.2769e-08, 6.1490e-08, 5.4793e-08, 5.4650e-08,\n 9.4625e-15, 8.3737e-08, 1.1859e-07, 7.6716e-08, 1.1429e-08, 9.1325e-08,\n 5.1076e-08, 5.3124e-08, 2.0798e-08, 5.2463e-14, 5.9291e-08, 5.5627e-08,\n 2.0326e-16, 2.7036e-13, 9.2450e-08, 5.6690e-08, 7.2638e-13, 4.1726e-08,\n 7.2601e-08, 1.0597e-07, 6.3963e-08, 6.9112e-08, 4.6151e-08, 7.0561e-08,\n 5.7743e-08, 8.0653e-08, 2.9925e-13, 4.5359e-08, 1.1515e-07, 1.1109e-07,\n 1.0016e-07, 9.9966e-08, 4.4450e-08, 6.0588e-08, 2.2971e-08, 3.8218e-19,\n 6.7363e-08, 6.1889e-08, 1.0148e-07, 8.4313e-08, 6.6332e-08, 8.5170e-08,\n 5.6598e-08, 7.8583e-08, 6.0830e-08, 9.0525e-08, 5.4514e-08, 1.2473e-07,\n 8.2576e-08, 7.2221e-08, 8.2329e-08, 7.4781e-08, 1.2159e-08, 2.2329e-22,\n 7.9926e-08, 6.3625e-08, 4.4455e-08, 6.9098e-08, 3.8461e-08, 6.9716e-08,\n 8.6357e-08, 9.9792e-08, 8.5500e-08, 4.2147e-08, 1.0061e-07, 6.7712e-08,\n 7.3601e-08, 2.4458e-24, 3.5127e-08, 4.1323e-08, 9.9198e-08, 6.1629e-08,\n 9.4298e-08, 7.2132e-08, 6.2479e-08, 4.3293e-08, 3.8385e-08, 8.3729e-09,\n 8.0848e-08, 1.6289e-08, 5.0975e-08, 1.0613e-07, 7.5641e-08, 8.6097e-08,\n 7.8037e-08, 5.1198e-08, 6.1306e-08, 5.9772e-08, 3.3729e-08, 8.8432e-08,\n 1.0220e-07, 6.1130e-08, 2.8938e-08, 2.4178e-08, 8.7599e-08, 1.0851e-07,\n 9.4737e-08, 2.3968e-08, 5.5698e-08, 5.8258e-08, 8.3814e-08, 1.6006e-07,\n 2.2914e-13, 1.0523e-07, 5.8991e-08, 4.3482e-17, 5.4626e-08, 6.9212e-08,\n 2.9142e-08, 3.7989e-08, 8.5437e-08, 7.7721e-08, 8.7182e-08, 1.0271e-07,\n 8.3271e-08, 6.1507e-08, 8.4400e-08, 6.9125e-10, 8.1785e-08, 1.1489e-07,\n 6.0956e-08, 6.2118e-08, 6.2729e-08, 1.2150e-07, 4.6486e-17, 5.0576e-08,\n 6.8507e-08, 1.3869e-07, 2.7374e-22, 5.9920e-08, 7.3560e-08, 4.0483e-08,\n 7.2954e-08, 4.5384e-08, 3.5992e-07, 4.9700e-08, 6.9390e-08, 1.0258e-07,\n 6.8362e-08, 7.8076e-08, 4.8698e-08, 6.2776e-08, 1.0741e-07, 5.8319e-08,\n 8.7868e-08, 1.1089e-08, 4.2304e-08, 1.2575e-07, 1.8326e-09, 1.2150e-07,\n 4.0222e-08, 1.1189e-07, 6.7959e-08, 7.5609e-08, 1.1840e-07, 8.2585e-08,\n 6.8389e-08, 7.1487e-08, 6.0132e-08, 4.4421e-08, 3.6537e-08, 4.8270e-08,\n 7.8042e-08, 3.5766e-08, 5.3157e-08, 7.7624e-08, 4.7726e-08, 3.4542e-08,\n 9.2564e-08, 6.0190e-08, 5.1858e-08, 5.8331e-08, 1.0297e-07, 3.0635e-08,\n 3.1734e-08, 3.6637e-08, 9.4198e-08, 8.1687e-08, 6.0947e-08, 3.9238e-17,\n 6.4168e-08, 5.0018e-08, 8.5594e-08, 7.7940e-17, 3.3873e-08, 1.2006e-07,\n 3.4679e-08, 5.2280e-08, 9.8159e-08, 1.9579e-08, 4.8499e-08, 6.5010e-08,\n 4.5723e-08, 2.8277e-13, 6.4553e-08, 8.0766e-08, 5.8509e-08, 8.3521e-08,\n 7.8720e-08, 3.4375e-08, 6.4446e-08, 7.7281e-08, 4.1608e-08, 1.2982e-07,\n 6.2329e-08, 7.4233e-08, 2.1238e-22, 3.9934e-08, 1.1078e-07, 8.2094e-08,\n 8.9255e-08, 4.1956e-08, 8.8242e-08, 6.6014e-08, 9.5264e-08, 3.9981e-08,\n 1.0527e-07, 7.4550e-08, 8.7873e-08, 6.1998e-08, 4.8800e-08, 5.7819e-14,\n 6.5206e-08, 1.3130e-07, 4.1007e-08, 3.9246e-08, 6.7999e-08, 5.9658e-08,\n 5.8657e-08, 8.3163e-08, 2.3979e-13, 6.2605e-08, 4.1870e-08, 4.2187e-08,\n 6.3657e-08, 8.3571e-08, 6.7468e-08, 5.3931e-08, 3.8636e-08, 3.3801e-08,\n 5.5935e-08, 6.8205e-08, 6.9854e-08, 5.7646e-08, 4.6696e-08, 6.4766e-08,\n 3.7573e-09, 3.7450e-08, 1.3478e-07, 6.6622e-08, 4.1628e-08, 7.0268e-08,\n 2.3791e-13, 1.0459e-07, 8.3949e-08, 1.0852e-07, 8.6452e-08, 3.7938e-08,\n 6.8281e-08, 1.0601e-07, 7.7421e-08, 2.3529e-08, 7.4921e-08, 7.2947e-08,\n 3.5098e-08, 2.0988e-08, 8.3058e-10, 4.5722e-08, 6.0156e-08, 1.2132e-07,\n 8.2193e-08, 1.3972e-07, 7.1527e-08, 9.8369e-08, 3.5039e-08, 7.3941e-08,\n 6.6052e-08, 8.8704e-08, 1.3233e-07, 1.9698e-12, 3.4003e-08, 7.5907e-08,\n 5.0273e-08, 7.4030e-08, 2.7230e-08, 8.2566e-08, 8.5051e-08, 6.9269e-08,\n 6.9019e-08, 8.9281e-08, 1.4424e-07, 3.2890e-08, 1.6842e-10, 6.3726e-08,\n 2.7816e-08, 5.4954e-14, 5.9887e-16, 3.1595e-16, 6.6819e-08, 8.7609e-08,\n 4.8798e-08, 6.7729e-08, 8.3521e-08, 4.1633e-08, 8.8071e-08, 9.9096e-08,\n 8.8916e-08, 1.6401e-08, 4.0651e-08, 4.9328e-08, 6.6440e-08, 9.9954e-16,\n 6.9833e-08, 1.2116e-07, 6.5253e-08, 5.6414e-08, 5.7969e-08, 4.2145e-08,\n 1.2149e-07, 4.8442e-08, 6.3702e-08, 6.9232e-08, 7.8781e-08, 8.2569e-08,\n 5.6672e-08, 1.2248e-07, 3.0960e-14, 7.1951e-08, 5.3978e-08, 2.2468e-08,\n 1.6471e-08, 5.9664e-08, 1.0468e-07, 8.3872e-08, 5.3903e-08, 4.9501e-08,\n 8.0004e-08, 5.5487e-08, 4.2959e-08, 3.0535e-15, 4.5919e-08, 9.7240e-08,\n 4.3107e-08, 4.5804e-08, 6.6736e-08, 6.4053e-08, 3.5367e-08, 8.8355e-08,\n 7.2146e-08, 6.5538e-08, 5.6178e-08, 8.1607e-08, 7.9206e-08, 6.6707e-08,\n 8.8734e-08, 9.8253e-08, 7.1382e-08, 6.2393e-08, 6.0621e-08, 6.7563e-08,\n 8.7647e-08, 3.5188e-08, 8.5301e-08, 4.8417e-08, 6.0264e-08, 6.7626e-08,\n 8.3093e-08, 3.0769e-08, 1.2722e-08, 1.3643e-07, 5.8077e-08, 7.2089e-08,\n 7.0575e-08, 4.1742e-08, 1.0339e-07, 1.0056e-07, 1.0367e-07, 7.6957e-08,\n 7.5685e-08, 1.0819e-07, 1.9749e-07, 1.0103e-07, 5.8948e-08, 8.6604e-08,\n 6.7058e-08, 4.1478e-08, 2.4701e-08, 7.4054e-08, 5.6126e-08, 5.4482e-08,\n 8.8206e-08, 6.1550e-08, 6.5286e-08, 1.0523e-07, 2.1408e-08, 1.1047e-07,\n 3.8747e-08, 6.9196e-08, 6.6081e-08, 7.1745e-08, 6.2005e-08, 7.4106e-08,\n 7.8320e-08, 6.8285e-08, 6.7165e-08, 7.5601e-13, 5.7002e-08, 8.0443e-08,\n 9.7117e-08, 8.8634e-08, 9.8513e-08, 1.4389e-07, 6.1703e-08, 2.7248e-15,\n 8.3891e-08, 9.0988e-08, 8.6101e-08, 6.1586e-08, 7.4822e-08, 9.1865e-08,\n 6.8618e-08, 6.8501e-08, 6.7168e-08, 1.1386e-07, 7.7982e-08, 8.9878e-08,\n 7.1655e-08, 5.2569e-08, 5.1488e-17, 8.2820e-08, 1.0577e-07, 5.0618e-08,\n 7.9543e-08, 8.5270e-08, 5.4401e-08, 1.0779e-15, 6.7129e-08, 5.2576e-14,\n 9.7885e-08, 5.5657e-08, 8.8299e-08, 7.4575e-08, 3.3544e-08, 4.9183e-08,\n 7.2579e-08, 3.8436e-08, 4.8162e-08, 5.2726e-08, 7.7683e-08, 3.2712e-08,\n 4.1895e-08, 2.1896e-08, 1.8150e-20, 9.1559e-08, 7.3914e-08, 4.5169e-08,\n 7.1327e-08, 3.9344e-08, 7.0694e-08, 2.1099e-17, 4.4817e-08, 6.1898e-08,\n 5.7717e-08, 4.0337e-08, 3.3079e-08, 5.9886e-08, 4.9482e-08, 3.9922e-08,\n 9.5956e-08, 5.0570e-08, 6.1218e-08, 9.3191e-08, 9.0299e-08, 8.1224e-08,\n 1.1252e-07, 2.9632e-08, 7.2095e-08, 6.0498e-08, 6.8623e-08, 7.6296e-08,\n 7.7786e-08, 4.0308e-08, 4.7654e-08, 9.5403e-17, 1.0485e-07, 1.1070e-13,\n 8.3619e-08, 1.1904e-07, 1.0562e-07, 9.8547e-08, 7.8979e-08, 8.2478e-08,\n 5.5903e-08, 7.6623e-08, 9.4655e-18, 7.3662e-08, 9.8840e-08, 3.4206e-08,\n 2.0878e-08, 2.8190e-08, 2.4850e-15, 7.7452e-17, 7.7373e-08, 1.0891e-07,\n 2.9511e-19, 5.1603e-08, 9.5170e-08, 8.8150e-08, 1.3838e-07, 3.1432e-17,\n 6.2060e-08, 5.1358e-08, 6.0919e-08, 6.0145e-08, 1.1722e-07, 9.4244e-08,\n 7.3871e-08, 3.8061e-08, 7.9082e-08, 1.0469e-07, 1.2891e-07, 7.3625e-08,\n 5.1829e-08, 6.6510e-08, 9.7652e-08, 3.6128e-08, 6.3134e-08, 5.2271e-08,\n 1.3483e-07, 5.2201e-08, 1.0239e-07, 7.1383e-08, 4.4497e-08, 1.1307e-07,\n 1.0221e-07, 3.7897e-08, 5.6012e-08, 5.6706e-08, 5.9359e-08, 2.7623e-18,\n 5.2476e-08, 7.0461e-08, 6.2976e-08, 1.0243e-07, 9.7744e-08, 8.0291e-08,\n 3.8427e-08, 2.9740e-16, 1.1178e-07, 6.9708e-08, 5.6606e-08, 3.8155e-08,\n 6.7163e-08, 2.3433e-08, 8.4967e-08, 5.9070e-08, 4.8178e-08, 8.6470e-08,\n 5.2347e-08, 5.7406e-08, 1.9019e-08, 5.0029e-08, 6.7334e-08, 4.9171e-08,\n 7.9316e-08, 4.8841e-08, 7.6190e-08, 4.0751e-08, 6.8152e-08, 8.8598e-08,\n 7.4547e-08, 9.5213e-08, 7.3341e-08, 5.5634e-08, 6.7202e-16, 2.5624e-07,\n 1.2536e-07, 5.2966e-18, 8.6347e-08, 6.6983e-08, 3.9320e-08, 6.7351e-08,\n 7.0542e-08, 3.8272e-08, 7.5532e-08, 9.7780e-08, 5.1083e-08, 7.5180e-08,\n 4.7287e-13, 1.2852e-07, 6.1607e-08, 8.5450e-08, 6.4089e-08, 4.2954e-08,\n 9.9830e-08, 9.8308e-08, 1.6192e-07, 8.5610e-08, 3.8651e-08, 6.2116e-08,\n 8.9842e-08, 1.1712e-07, 2.9288e-08, 3.8093e-08, 6.5419e-08, 8.4769e-08,\n 6.4265e-08, 8.2300e-08, 6.0006e-08, 6.2058e-08, 3.0789e-08, 5.0157e-08,\n 6.1652e-08, 8.4358e-08, 5.5289e-08, 2.2787e-08, 5.1760e-08, 5.3833e-08,\n 5.4221e-08, 6.0091e-08, 6.4715e-08, 5.5333e-08, 8.5540e-08, 9.4109e-08,\n 3.0016e-08, 5.3202e-08, 7.7217e-08, 6.7576e-08, 4.0285e-08, 6.2959e-08,\n 6.8019e-08, 2.1143e-08, 6.7270e-16, 1.0168e-22, 9.3126e-08, 5.7618e-08],\n device='cuda:0')" }, "4": { - "step": "tensor(7512.)", - "exp_avg": "tensor([ 5.6052e-45, 3.7580e-04, 5.6052e-45, ..., -2.6365e-04,\n -2.7972e-05, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.4824e-10, 4.8990e-07, 4.7533e-11, ..., 1.3344e-06, 1.2133e-06,\n 1.3355e-09], device='cuda:0')" + "step": "tensor(18780.)", + "exp_avg": "tensor([[ 2.7152e-06, -1.1160e-07, 5.1798e-06, ..., 5.6052e-45,\n -3.2382e-07, -1.6074e-06],\n [-4.7832e-06, -2.4380e-06, 9.2844e-06, ..., -5.6052e-45,\n 1.3716e-05, -1.7795e-06],\n [-1.3344e-06, -5.5367e-06, 1.3206e-05, ..., -5.6052e-45,\n -3.5029e-06, -3.5263e-06],\n ...,\n [ 2.2807e-07, -2.2843e-06, 1.4620e-06, ..., -5.6052e-45,\n 9.0326e-06, -1.8264e-07],\n [ 6.6750e-06, 3.2813e-06, 1.3889e-05, ..., -5.6052e-45,\n 1.2284e-05, -1.5279e-06],\n [ 1.6147e-06, -1.8643e-06, 1.8256e-05, ..., -5.6052e-45,\n 3.2340e-06, -6.4436e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.5223e-10, 1.4120e-10, 1.3901e-10, ..., 2.8500e-25, 3.6669e-10,\n 1.4199e-10],\n [5.7699e-10, 1.9286e-10, 3.3054e-10, ..., 1.4565e-23, 8.9184e-10,\n 6.1979e-10],\n [1.0160e-09, 2.6635e-10, 1.0657e-09, ..., 6.3524e-24, 6.7659e-10,\n 6.9815e-10],\n ...,\n [9.7485e-10, 2.9271e-10, 3.5913e-10, ..., 5.5576e-24, 4.2451e-10,\n 3.9015e-10],\n [1.0209e-09, 2.9706e-10, 6.1248e-10, ..., 8.2826e-26, 7.4784e-10,\n 8.1800e-10],\n [9.4904e-10, 2.1795e-10, 9.3648e-10, ..., 5.3081e-24, 5.0568e-10,\n 6.4686e-10]], device='cuda:0')" }, "5": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[ 5.6052e-45, -7.0514e-07, -5.6052e-45, ..., -3.9847e-06,\n 2.9505e-06, 5.6052e-45],\n [ 5.6052e-45, -2.9139e-06, -5.6052e-45, ..., -1.0544e-06,\n 2.9733e-06, 5.6052e-45],\n [ 5.6052e-45, -2.6041e-06, 5.6052e-45, ..., 4.5052e-06,\n 4.2818e-07, -5.6052e-45],\n ...,\n [-5.6052e-45, 6.3490e-06, 5.6052e-45, ..., -1.0507e-05,\n -3.1109e-06, 5.6052e-45],\n [ 5.6052e-45, 7.7425e-06, 5.6052e-45, ..., -8.9401e-06,\n -6.8942e-06, 5.6052e-45],\n [-5.6052e-45, 2.5644e-06, 5.6052e-45, ..., 9.5593e-06,\n -3.1913e-06, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.8401e-15, 1.9471e-11, 8.1013e-15, ..., 1.2060e-10, 8.4974e-11,\n 1.1169e-12],\n [2.3990e-14, 6.0794e-11, 2.1553e-14, ..., 8.6197e-11, 8.0703e-11,\n 1.6499e-12],\n [1.6022e-14, 3.9397e-11, 1.3287e-13, ..., 2.2250e-10, 1.0603e-10,\n 2.9965e-12],\n ...,\n [3.8903e-15, 2.7162e-11, 3.2012e-13, ..., 2.4657e-10, 1.0317e-10,\n 5.0358e-12],\n [8.0342e-14, 1.9714e-10, 3.0357e-14, ..., 2.9656e-10, 1.8254e-10,\n 1.9363e-12],\n [2.4853e-15, 4.0097e-11, 3.0763e-14, ..., 3.0585e-10, 1.2834e-10,\n 2.5422e-12]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[ 3.0495e-11, 5.5635e-08, -3.5667e-09, ..., -3.2679e-06,\n 1.5753e-07, 3.6537e-08],\n [ 3.6551e-07, -1.7233e-07, 6.5017e-11, ..., -5.4078e-06,\n -2.3225e-07, 6.3039e-09],\n [-8.2973e-08, 2.3203e-08, 1.0780e-09, ..., 5.5507e-07,\n -2.6176e-07, 2.4158e-08],\n ...,\n [ 4.2235e-07, 8.6951e-07, 3.4547e-11, ..., 5.4313e-07,\n -6.0603e-07, 8.3649e-07],\n [ 3.2226e-09, 2.6800e-07, -3.1045e-16, ..., -3.0056e-06,\n 2.1099e-06, 1.3310e-09],\n [ 1.0958e-07, -3.9206e-07, 1.0235e-11, ..., 5.1554e-07,\n -2.3998e-07, 1.6730e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.7129e-13, 5.4434e-12, 1.2102e-12, ..., 3.5673e-11, 6.7137e-12,\n 1.3818e-12],\n [5.1089e-11, 2.8227e-11, 8.5587e-15, ..., 1.1368e-10, 3.1019e-11,\n 2.2906e-12],\n [1.9500e-12, 4.1384e-12, 4.1212e-13, ..., 4.2542e-12, 1.5643e-11,\n 1.1425e-12],\n ...,\n [2.6992e-10, 2.9454e-11, 2.8103e-12, ..., 6.5465e-12, 1.8181e-11,\n 1.2031e-09],\n [1.6738e-12, 2.2312e-11, 3.1590e-15, ..., 5.0788e-11, 4.8773e-11,\n 6.7045e-12],\n [1.8092e-11, 1.0922e-11, 2.4893e-11, ..., 5.0600e-11, 2.9658e-11,\n 1.3599e-10]], device='cuda:0')" }, - "15": { - "step": "tensor(15024.)", + "6": { + "step": "tensor(17528.)", + "exp_avg": "tensor([ 1.5804e-05, -5.2604e-06, -2.9656e-06, ..., -1.0888e-06,\n 2.6365e-05, 1.0766e-05], device='cuda:0')", + "exp_avg_sq": "tensor([8.8234e-10, 2.4309e-09, 8.8992e-10, ..., 3.0139e-09, 1.5642e-09,\n 1.8014e-09], device='cuda:0')" + }, + "7": { + "step": "tensor(17528.)", + "exp_avg": "tensor([[ 3.4129e-07, 9.1339e-08, -1.1961e-06, ..., 1.5952e-07,\n -4.1711e-08, 1.3631e-07],\n [-7.8759e-08, -5.4807e-07, -6.2696e-07, ..., 3.4834e-07,\n 1.4681e-07, -3.2027e-08],\n [-1.7506e-07, -7.7807e-08, 3.7035e-07, ..., -1.5376e-07,\n -8.5742e-08, -9.3733e-08],\n ...,\n [ 2.9398e-08, 2.6790e-07, -1.4498e-07, ..., -9.0748e-08,\n -2.2458e-07, 2.0909e-08],\n [ 9.9714e-08, -2.2231e-07, 1.3567e-06, ..., 5.5586e-07,\n -9.9696e-08, -2.5482e-09],\n [ 2.9960e-07, -2.7523e-07, 1.4012e-06, ..., -3.1880e-07,\n 1.8966e-07, 1.4796e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.0173e-13, 1.2740e-12, 6.3058e-12, ..., 1.0984e-12, 8.5481e-13,\n 1.4636e-12],\n [9.2696e-13, 3.8880e-12, 4.5582e-12, ..., 1.7763e-12, 1.7126e-12,\n 1.5741e-12],\n [1.0344e-12, 3.0895e-12, 2.0857e-12, ..., 1.7091e-12, 8.8887e-13,\n 2.3281e-12],\n ...,\n [4.7425e-13, 2.5308e-12, 2.7347e-12, ..., 2.7204e-12, 1.1579e-12,\n 2.8466e-12],\n [6.1335e-13, 4.3896e-12, 1.1487e-11, ..., 2.7224e-12, 9.0095e-13,\n 2.9537e-12],\n [8.9404e-13, 2.0975e-12, 2.5095e-11, ..., 1.8646e-12, 1.7881e-12,\n 1.9283e-12]], device='cuda:0')" + }, + "14": { + "step": "tensor(17528.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.7637e-11], device='cuda:0')" + "exp_avg_sq": "tensor([6.3850e-13], device='cuda:0')" }, - "16": { - "step": "tensor(15024.)", + "15": { + "step": "tensor(17528.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.3022e-14, 3.0411e-13, 1.1168e-13], device='cuda:0')" + "exp_avg_sq": "tensor([8.2372e-16, 4.6629e-14, 3.5058e-14], device='cuda:0')" }, - "17": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([3.7926e-10, 2.6781e-11, 1.5717e-11, 3.0072e-11, 2.3660e-11],\n device='cuda:0')" + "16": { + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.9602e-11, 5.1110e-12, 5.5302e-12, 5.9074e-12], device='cuda:0')" + }, + "18": { + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.2884e-17, 3.3013e-17, 0.0000e+00, ..., 2.1783e-16, 7.8344e-17,\n 2.5146e-18],\n [1.1283e-17, 2.9004e-17, 0.0000e+00, ..., 2.6665e-17, 9.3763e-17,\n 2.6425e-17],\n [3.3910e-18, 1.1967e-17, 0.0000e+00, ..., 3.9992e-18, 2.5927e-17,\n 8.4913e-18],\n ...,\n [4.2967e-19, 2.3109e-18, 0.0000e+00, ..., 3.2956e-18, 4.5638e-17,\n 2.5711e-19],\n [8.2763e-17, 8.7008e-17, 0.0000e+00, ..., 1.0531e-16, 4.4140e-16,\n 2.6490e-17],\n [2.5852e-19, 2.0091e-19, 0.0000e+00, ..., 3.9765e-19, 3.9443e-18,\n 9.3422e-19]], device='cuda:0')" }, "19": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.9593e-19, 1.4236e-18, 2.5371e-19, ..., 5.7390e-20, 6.7952e-19,\n 3.6636e-19],\n [1.5962e-16, 1.6036e-16, 1.2272e-19, ..., 2.9519e-17, 5.9032e-18,\n 1.2578e-17],\n [1.6646e-15, 1.9058e-15, 1.8942e-19, ..., 1.5367e-16, 2.0798e-16,\n 6.3127e-17],\n ...,\n [5.0008e-17, 2.2544e-17, 4.3183e-18, ..., 1.4376e-18, 1.3348e-17,\n 1.6421e-18],\n [6.3261e-18, 5.1634e-18, 1.3171e-19, ..., 1.1792e-19, 6.5490e-19,\n 7.5843e-20],\n [5.7169e-15, 6.6950e-15, 1.0207e-18, ..., 5.1628e-16, 7.3375e-16,\n 2.5059e-16]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.4813e-14, 3.4111e-14, 7.7940e-15, 1.4149e-14, 5.1477e-15, 4.9883e-17,\n 8.8568e-16, 1.4048e-15, 3.5902e-15, 2.0324e-16, 1.1386e-14, 5.7903e-14,\n 2.3860e-14, 2.8903e-14, 4.3636e-15, 2.7213e-14, 6.4861e-14, 5.3764e-15,\n 3.5961e-14, 5.8908e-14, 7.0318e-16, 2.4750e-14, 5.3506e-15, 5.2903e-14,\n 1.0703e-14, 2.1660e-14, 9.6536e-15, 6.0051e-15, 1.6352e-15, 1.0130e-14,\n 3.0306e-14, 1.5188e-14, 5.5187e-16, 1.3931e-13, 7.2234e-14, 3.5119e-14,\n 3.6415e-15, 4.0159e-16, 2.7602e-14, 2.6900e-14, 1.3198e-13, 3.2645e-15,\n 5.8255e-16, 1.8448e-15, 4.6573e-15, 3.3837e-14, 1.4549e-14, 9.4839e-15,\n 9.2284e-15, 1.0818e-14, 4.4255e-16, 1.5578e-14, 3.9567e-14, 2.4139e-14,\n 5.1188e-15, 2.6693e-15, 1.8191e-14, 2.3172e-16, 6.9971e-15, 2.5157e-15,\n 7.8361e-15, 1.7312e-14, 1.1165e-14, 6.4708e-15, 6.3419e-16, 4.1299e-15,\n 1.5018e-14, 8.8342e-16, 1.8929e-14, 1.7817e-14, 1.7041e-16, 5.7520e-15,\n 1.5216e-14, 6.9171e-16, 6.1046e-15, 5.2301e-17, 4.0474e-15, 1.0335e-13,\n 2.0549e-15, 1.8993e-16, 3.1683e-14, 4.2407e-14, 6.9499e-18, 2.3648e-16,\n 7.5546e-15, 2.6240e-14, 7.8951e-16, 1.3429e-14, 1.7216e-15, 2.9860e-14,\n 4.7416e-15, 4.3309e-14, 5.5203e-14, 3.9062e-15, 5.4690e-15, 2.1771e-14,\n 1.4352e-15, 8.5587e-14, 2.8549e-14, 1.0226e-13, 2.9185e-14, 2.0222e-15,\n 4.0354e-16, 1.2349e-14, 1.0263e-15, 1.0195e-13, 2.5590e-15, 7.2506e-15,\n 1.6337e-14, 1.2605e-16, 1.3153e-13, 2.4366e-15, 3.9118e-14, 1.6613e-14,\n 5.6356e-15, 7.7523e-14, 1.9315e-15, 9.3974e-15, 2.5947e-14, 1.2414e-14,\n 1.3542e-14, 2.5058e-14, 2.4883e-15, 4.1828e-16, 1.6311e-16, 9.1119e-15,\n 2.1731e-13, 1.6944e-15, 1.4263e-14, 2.8288e-15, 3.8067e-16, 6.9882e-14,\n 5.8732e-15, 9.8366e-15, 3.9810e-14, 2.8427e-14, 2.4026e-14, 7.3660e-14,\n 3.1762e-14, 6.9876e-18, 2.8973e-16, 4.7008e-16, 2.5579e-14, 1.0497e-13,\n 5.8793e-14, 3.8627e-15, 3.9508e-15, 5.1001e-15, 3.1463e-14, 7.1249e-17,\n 1.6305e-15, 3.2755e-14, 1.1308e-14, 1.2593e-14, 2.8173e-14, 9.6921e-16,\n 8.8494e-15, 2.3250e-14, 4.5170e-15, 7.5309e-14, 3.5708e-14, 5.8595e-15,\n 1.3899e-14, 3.7737e-14, 2.3856e-15, 1.7068e-14, 1.6802e-13, 2.5530e-14,\n 1.7640e-14, 8.1341e-15, 1.1765e-15, 2.8064e-14, 1.1472e-13, 2.8855e-14,\n 3.7920e-14, 2.7622e-16, 3.8234e-15, 1.3202e-14, 1.5045e-14, 1.6130e-14,\n 1.9795e-14, 9.2426e-15, 1.8559e-15, 1.1144e-14, 2.1212e-14, 5.8861e-17,\n 5.5558e-15, 2.2818e-14, 5.7877e-15, 9.1978e-14, 6.8755e-14, 7.1305e-15,\n 4.7202e-15, 2.0446e-15, 1.3645e-17, 3.2948e-13, 3.8612e-15, 6.6480e-15,\n 7.3399e-15, 1.6093e-13, 1.6033e-18, 1.5028e-13, 2.8268e-14, 4.3009e-15,\n 2.7458e-14, 3.3739e-13, 3.1020e-14, 9.6810e-15, 1.4050e-16, 4.3547e-14,\n 3.5159e-15, 6.3551e-14, 9.8951e-15, 2.0162e-14, 1.5434e-16, 2.5673e-14,\n 1.9742e-13, 1.1176e-16, 2.1760e-15, 6.3821e-15, 4.6985e-15, 1.3134e-13,\n 1.6541e-14, 9.4438e-14, 1.3092e-14, 1.5898e-17, 3.3730e-16, 1.9323e-15,\n 7.1618e-16, 2.7573e-14, 2.3574e-16, 2.5197e-16, 4.7862e-14, 1.8718e-14,\n 1.4598e-14, 3.2525e-15, 3.8933e-15, 2.9470e-14, 3.6896e-15, 3.2421e-15,\n 3.7611e-14, 1.5307e-16, 8.2805e-15, 5.0774e-14, 8.8508e-15, 2.5355e-16,\n 4.7384e-16, 2.1307e-14, 2.4056e-15, 7.6634e-14, 3.9953e-14, 4.0144e-15,\n 3.4511e-14, 4.0748e-15, 1.1526e-13, 2.0379e-15], device='cuda:0')" }, "20": { - "step": "tensor(15024.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.2852e-16, 1.4870e-13, 1.0345e-12, 2.6084e-14, 1.7780e-13, 6.9083e-13,\n 7.8153e-15, 1.0620e-12, 8.2967e-14, 1.9280e-15, 4.7902e-12, 1.5264e-14,\n 4.8336e-15, 1.0972e-13, 1.1341e-13, 8.9920e-13, 3.0347e-13, 3.0209e-14,\n 1.5040e-12, 4.5173e-13, 4.5234e-13, 4.3493e-14, 1.5496e-13, 3.1565e-13,\n 1.8825e-13, 3.0590e-14, 1.3971e-14, 2.0875e-12, 1.4028e-15, 4.4135e-14,\n 8.5047e-14, 4.1734e-13, 6.9142e-13, 2.6157e-14, 3.6682e-14, 9.9097e-14,\n 1.9437e-15, 1.2544e-13, 7.5545e-13, 3.9805e-15, 1.6060e-14, 5.1083e-14,\n 1.3700e-12, 2.3431e-13, 1.6197e-14, 1.2688e-15, 5.6227e-15, 4.4955e-14,\n 1.1039e-14, 2.4519e-16, 8.5535e-16, 9.9280e-15, 5.3073e-14, 2.8631e-13,\n 3.7505e-15, 4.2491e-13, 7.4798e-13, 3.3622e-12, 3.8276e-13, 7.1881e-16,\n 5.4848e-12, 1.8573e-16, 1.8386e-14, 2.8402e-13, 8.3258e-13, 1.2221e-12,\n 1.9899e-14, 2.1012e-14, 4.4149e-12, 4.1275e-14, 6.3766e-13, 1.7524e-13,\n 3.2749e-13, 5.7018e-14, 8.6160e-16, 8.2086e-14, 5.9409e-13, 9.6703e-15,\n 8.3201e-18, 3.3436e-16, 8.7657e-16, 1.3055e-14, 1.3779e-12, 5.8029e-13,\n 8.9784e-14, 2.1658e-14, 4.6953e-13, 8.0488e-14, 1.2439e-13, 3.1072e-12,\n 1.7847e-14, 1.1631e-13, 2.1437e-14, 8.8924e-13, 1.3960e-13, 2.5248e-14,\n 1.1740e-16, 3.4896e-14, 1.1273e-12, 2.2392e-13, 6.1005e-16, 8.2787e-13,\n 6.6716e-15, 4.1575e-16, 3.5237e-13, 6.2242e-14, 1.1435e-15, 1.5037e-14,\n 4.6763e-14, 6.1265e-14, 5.8608e-16, 1.2578e-14, 4.9608e-15, 1.4142e-15,\n 7.1572e-14, 3.0275e-14, 8.8532e-13, 2.4495e-14, 5.3540e-13, 2.3444e-13,\n 1.1281e-14, 2.0124e-13, 3.0425e-13, 2.3596e-14, 3.9966e-13, 4.5547e-14,\n 1.7711e-12, 3.0188e-14, 6.9894e-14, 4.8265e-15, 2.0628e-15, 3.2342e-14,\n 1.1373e-14, 1.5020e-13, 2.9737e-14, 4.9428e-13, 1.7884e-15, 1.1466e-14,\n 3.1232e-13, 4.4179e-14, 9.3764e-16, 2.5637e-15, 4.6150e-12, 1.1760e-11,\n 2.8617e-13, 1.2531e-15, 3.7629e-13, 3.2005e-13, 2.4836e-13, 6.6105e-15,\n 1.2444e-15, 2.2342e-13, 8.1537e-13, 2.3776e-13, 4.0200e-13, 1.2056e-13,\n 2.3142e-15, 1.1580e-14, 1.7957e-14, 1.2316e-12, 6.2970e-15, 1.8455e-13,\n 2.3144e-13, 5.8773e-15, 5.9625e-15, 8.4088e-12, 7.7319e-13, 1.8959e-15,\n 2.3901e-12, 1.7376e-16, 2.2403e-13, 1.2074e-14, 2.9702e-13, 6.4112e-14,\n 1.1914e-13, 1.6493e-12, 2.7837e-13, 1.3062e-13, 2.8430e-14, 7.4335e-13,\n 1.1442e-13, 2.7462e-13, 6.9450e-12, 8.8684e-14, 4.4682e-15, 6.9726e-17,\n 1.1557e-14, 1.0675e-12, 5.5199e-14, 2.3333e-13, 1.1707e-12, 4.4623e-13,\n 9.3514e-15, 5.3146e-15, 8.5885e-13, 1.4939e-13, 1.5432e-15, 3.3443e-13,\n 2.4661e-15, 2.8599e-13, 6.3202e-13, 3.5459e-14, 1.5618e-13, 4.4135e-15,\n 7.3416e-14, 2.2626e-12, 3.5088e-14, 2.5662e-13, 7.9283e-16, 1.9430e-12,\n 3.6828e-13, 2.2649e-15, 1.7730e-14, 1.4583e-15, 1.3659e-12, 1.3727e-14,\n 9.7334e-14, 5.7784e-13, 2.9989e-14, 2.5577e-13, 2.0574e-13, 1.5920e-13,\n 9.2272e-15, 5.2066e-13, 3.9374e-14, 1.8241e-13, 2.1233e-12, 1.9069e-14,\n 8.2183e-16, 6.6821e-15, 8.0622e-14, 1.1741e-13, 4.5968e-15, 1.5268e-13,\n 6.3773e-14, 7.3264e-13, 8.8431e-14, 5.1026e-13, 5.4036e-14, 8.9923e-15,\n 3.2418e-14, 5.4481e-15, 1.4752e-12, 9.7676e-14, 1.3632e-14, 2.9044e-13,\n 1.0573e-14, 3.0510e-14, 9.5962e-13, 2.6209e-14, 3.3157e-13, 1.8924e-15,\n 2.1250e-13, 1.4462e-14, 1.8296e-15, 3.5154e-12], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8457e-16, 1.6879e-16, 1.1351e-17, 7.4072e-17, 8.6899e-18, 7.1669e-19,\n 1.5759e-18, 7.3179e-18, 4.3454e-18, 2.5527e-19, 2.0973e-17, 1.0503e-16,\n 6.0468e-17, 1.3602e-16, 1.4002e-17, 6.4098e-17, 2.1023e-16, 9.3249e-18,\n 1.1632e-16, 1.6760e-16, 3.4733e-19, 1.0410e-16, 1.1771e-17, 2.1591e-16,\n 4.1967e-17, 4.2040e-17, 2.4414e-17, 1.9953e-17, 5.9343e-18, 1.7930e-17,\n 9.2553e-17, 4.5061e-17, 3.5147e-18, 3.7507e-16, 2.7988e-16, 8.7583e-17,\n 7.2296e-18, 7.3166e-21, 5.7355e-17, 4.8629e-17, 5.5825e-16, 7.7875e-18,\n 2.4784e-18, 8.0936e-18, 9.3045e-18, 7.6116e-17, 3.1691e-17, 2.0007e-17,\n 1.4114e-17, 1.6715e-17, 3.4675e-19, 4.2564e-17, 8.2147e-17, 5.2620e-17,\n 7.0118e-18, 1.2588e-17, 5.8850e-17, 4.8482e-19, 2.3093e-17, 7.1255e-18,\n 1.4148e-17, 1.2915e-16, 3.1316e-17, 1.5542e-17, 9.5972e-21, 6.5979e-18,\n 3.2790e-17, 4.7963e-19, 3.4951e-17, 5.7685e-17, 4.0100e-18, 1.3216e-17,\n 1.6894e-17, 2.3224e-18, 1.1165e-17, 3.3801e-18, 2.3600e-17, 5.2478e-16,\n 1.7986e-18, 5.3672e-22, 1.3271e-16, 1.0995e-16, 1.0104e-18, 1.1682e-18,\n 1.4860e-17, 5.8627e-17, 3.1463e-18, 6.2868e-17, 5.0456e-18, 5.5678e-17,\n 1.4362e-17, 9.8963e-17, 2.0067e-16, 4.9528e-18, 1.5131e-17, 7.6733e-17,\n 3.1077e-18, 1.8230e-16, 1.1111e-16, 2.8754e-16, 8.7469e-17, 9.4142e-18,\n 4.9347e-18, 2.1888e-17, 4.2812e-19, 2.3957e-16, 2.6750e-18, 1.7093e-17,\n 3.4017e-17, 1.7814e-19, 3.3570e-16, 7.5951e-18, 1.1086e-16, 3.4883e-17,\n 1.0283e-17, 3.1290e-16, 7.6096e-18, 2.0037e-17, 7.8233e-17, 3.0115e-17,\n 2.4917e-17, 5.2188e-17, 2.0912e-18, 4.0361e-20, 1.6646e-18, 1.4022e-17,\n 9.8262e-16, 6.4259e-18, 4.5556e-17, 4.3145e-18, 1.3949e-19, 2.0841e-16,\n 1.4372e-17, 2.9266e-17, 7.6419e-17, 6.4197e-17, 6.7482e-17, 1.9011e-16,\n 8.6890e-17, 1.1788e-18, 7.4296e-18, 2.6936e-18, 5.6205e-17, 3.0855e-16,\n 3.6943e-16, 1.5291e-17, 1.5871e-17, 1.1938e-17, 6.5176e-17, 1.8070e-20,\n 1.6321e-17, 7.1823e-17, 3.6508e-17, 1.4336e-17, 6.6092e-17, 5.8718e-18,\n 1.3060e-17, 3.9612e-17, 7.4306e-18, 2.9786e-16, 6.9632e-17, 6.6313e-18,\n 3.2381e-17, 1.2580e-16, 6.9325e-18, 2.2182e-17, 5.7758e-16, 4.8419e-17,\n 5.3979e-17, 2.0729e-17, 6.0292e-19, 1.1194e-16, 2.7634e-16, 1.0769e-16,\n 7.1380e-17, 1.0626e-18, 5.9750e-18, 5.9053e-17, 4.6084e-17, 4.2125e-17,\n 5.1119e-17, 1.7887e-17, 2.2537e-18, 1.6732e-17, 5.3101e-17, 3.1742e-23,\n 8.5588e-18, 5.0761e-17, 5.0971e-18, 2.3031e-16, 1.7945e-16, 8.9812e-18,\n 1.4038e-17, 1.1186e-17, 1.2862e-18, 1.2772e-15, 4.9156e-18, 1.0645e-17,\n 2.2214e-17, 4.4228e-16, 3.8102e-19, 4.0378e-16, 5.1010e-17, 5.8750e-18,\n 9.1336e-17, 8.5988e-16, 6.7191e-17, 2.4430e-17, 4.6056e-20, 6.8049e-17,\n 4.8159e-18, 1.5841e-16, 6.7189e-17, 6.1528e-17, 9.2236e-20, 4.4585e-17,\n 9.3926e-16, 3.8319e-20, 8.1695e-18, 7.6450e-18, 6.0013e-18, 5.7772e-16,\n 9.2040e-17, 2.2982e-16, 3.8706e-17, 3.3825e-19, 1.6395e-20, 5.7295e-18,\n 7.0935e-19, 7.7530e-17, 1.9054e-18, 1.7745e-19, 2.8233e-16, 8.2653e-17,\n 4.4181e-17, 7.5766e-18, 4.9798e-18, 8.5714e-17, 1.0126e-17, 3.2370e-18,\n 9.3128e-17, 3.5657e-20, 2.4845e-17, 1.3077e-16, 1.3422e-17, 4.3467e-20,\n 1.1161e-19, 4.0758e-17, 3.7942e-18, 2.0681e-16, 8.1182e-17, 5.6424e-18,\n 7.7301e-17, 8.0641e-18, 3.6911e-16, 1.0732e-17], device='cuda:0')" }, "21": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7183e-17, 1.7029e-17, 2.3683e-15, 1.4782e-17, 7.2735e-17, 8.3228e-17,\n 3.9451e-17, 2.7604e-16, 1.7755e-18, 9.5239e-18, 2.9572e-15, 5.8524e-18,\n 8.6232e-19, 4.0574e-17, 2.2219e-16, 7.6175e-16, 3.2770e-16, 2.3491e-18,\n 5.5844e-15, 2.8015e-15, 2.5245e-15, 3.3190e-18, 5.3456e-18, 1.9342e-16,\n 1.5970e-15, 2.2805e-17, 3.6399e-17, 2.0136e-15, 6.6121e-19, 5.0417e-17,\n 1.7127e-17, 3.2819e-16, 2.4038e-15, 6.4426e-18, 3.0315e-17, 7.4479e-16,\n 1.0674e-17, 6.0244e-17, 1.4783e-15, 9.0574e-20, 7.2153e-18, 3.1298e-18,\n 1.5223e-15, 8.4081e-17, 1.2973e-17, 1.0607e-18, 7.5983e-18, 6.2270e-18,\n 3.6324e-18, 1.1132e-17, 1.1259e-18, 1.2354e-17, 8.4135e-17, 6.5227e-17,\n 5.5974e-20, 7.6051e-17, 4.2382e-17, 2.5628e-15, 1.8813e-16, 2.0717e-18,\n 1.1963e-14, 7.6322e-20, 5.3417e-19, 3.5094e-17, 1.6268e-15, 9.9242e-16,\n 1.3312e-17, 2.1474e-17, 1.5581e-14, 5.8302e-18, 1.1613e-15, 3.6724e-18,\n 4.5309e-17, 3.2241e-16, 3.5417e-18, 2.7060e-17, 6.0512e-16, 4.7754e-17,\n 1.0214e-17, 9.0701e-20, 4.7211e-18, 3.0677e-17, 3.4211e-16, 1.7022e-15,\n 8.4345e-17, 1.1763e-16, 6.3430e-17, 4.9008e-18, 3.0639e-17, 8.4722e-15,\n 4.1328e-17, 3.6535e-17, 1.0518e-17, 1.0752e-15, 1.9484e-17, 7.0543e-17,\n 2.1430e-19, 9.7204e-17, 1.9602e-15, 3.9637e-16, 1.4105e-18, 2.2016e-15,\n 1.0358e-18, 3.0380e-18, 9.9415e-17, 1.0879e-16, 4.2913e-18, 1.5014e-17,\n 1.0434e-17, 8.8674e-18, 1.4561e-19, 1.6204e-17, 2.7067e-17, 3.8412e-18,\n 1.7512e-17, 1.9848e-18, 2.3405e-15, 5.7590e-17, 7.3689e-16, 3.3183e-17,\n 2.9613e-17, 1.9836e-16, 2.5042e-16, 1.6615e-17, 8.1596e-16, 3.6500e-19,\n 1.8369e-15, 4.4714e-19, 2.1080e-17, 1.6911e-18, 1.5832e-18, 1.7706e-18,\n 2.4198e-18, 5.9693e-17, 6.2149e-18, 1.2754e-15, 2.9137e-18, 1.5116e-18,\n 1.8963e-16, 6.6576e-18, 2.8171e-18, 1.1681e-17, 2.0396e-14, 2.3151e-14,\n 4.4373e-17, 5.2834e-20, 4.0643e-16, 3.4873e-16, 5.8615e-16, 3.0040e-19,\n 2.3129e-18, 4.4909e-16, 8.6061e-16, 2.4447e-17, 7.5567e-17, 2.2235e-16,\n 1.7480e-20, 3.7707e-17, 8.3780e-18, 8.4851e-17, 1.1338e-18, 3.0248e-16,\n 1.1505e-17, 3.4806e-17, 1.1042e-17, 1.9670e-14, 4.5333e-16, 1.1832e-18,\n 3.1328e-15, 2.2478e-20, 2.4645e-17, 1.8756e-17, 2.0028e-15, 8.5454e-17,\n 5.2559e-17, 9.6685e-16, 8.9453e-17, 6.3869e-18, 4.1098e-17, 8.4217e-16,\n 2.1952e-18, 7.9697e-17, 1.8184e-14, 3.0136e-17, 1.6340e-18, 1.3775e-18,\n 5.2388e-19, 8.5628e-16, 1.2532e-16, 2.3264e-16, 7.3040e-16, 5.9040e-17,\n 3.6388e-18, 4.4981e-19, 5.0370e-16, 2.9848e-18, 2.4479e-19, 1.9687e-17,\n 3.5785e-18, 9.2642e-17, 1.6817e-16, 3.0776e-18, 1.6336e-17, 3.8349e-18,\n 4.9344e-18, 5.3583e-15, 3.7030e-17, 2.2443e-17, 1.2471e-17, 3.1365e-15,\n 1.1639e-16, 1.5947e-18, 1.3356e-18, 2.0717e-17, 1.3356e-16, 3.9489e-19,\n 3.4349e-18, 9.6538e-16, 1.4035e-16, 4.0865e-17, 3.7787e-16, 7.5028e-16,\n 1.1907e-18, 4.2052e-16, 2.1215e-17, 6.0979e-17, 4.2810e-16, 7.5713e-18,\n 2.4321e-19, 6.5392e-19, 9.6426e-18, 2.0801e-16, 6.8229e-19, 3.0094e-16,\n 7.3208e-18, 8.5659e-16, 8.9358e-16, 3.3798e-16, 5.4092e-17, 1.9272e-19,\n 5.0593e-18, 1.0004e-18, 5.4559e-16, 2.2031e-17, 5.1468e-18, 1.4078e-16,\n 1.2290e-18, 1.0008e-18, 5.0629e-15, 2.3820e-17, 3.5157e-17, 3.0961e-18,\n 1.5292e-16, 2.4838e-17, 3.2895e-18, 9.8331e-15], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1657e-16, 1.6159e-16, 2.2247e-17, 7.2719e-17, 1.1435e-17, 4.8576e-19,\n 2.2032e-18, 1.1967e-17, 7.3042e-18, 1.4314e-19, 3.4765e-17, 1.9622e-16,\n 6.9202e-17, 1.3485e-16, 2.3566e-17, 1.2431e-16, 2.8113e-16, 1.4864e-17,\n 1.0331e-16, 2.4393e-16, 3.9268e-19, 1.1656e-16, 2.6980e-17, 2.2775e-16,\n 5.6068e-17, 6.4924e-17, 4.4958e-17, 3.1546e-17, 1.2220e-17, 2.6138e-17,\n 1.3591e-16, 7.0712e-17, 5.4959e-18, 4.7948e-16, 3.0892e-16, 1.5087e-16,\n 5.2244e-18, 1.5186e-21, 1.2182e-16, 9.2523e-17, 4.2934e-16, 1.8115e-17,\n 5.9776e-18, 1.4298e-17, 1.5952e-17, 1.0877e-16, 6.7185e-17, 4.6829e-17,\n 2.3189e-17, 3.4903e-17, 5.6668e-19, 6.8873e-17, 1.2897e-16, 7.5655e-17,\n 1.2764e-17, 1.7268e-17, 8.4077e-17, 4.3715e-19, 3.8989e-17, 1.4974e-17,\n 1.8751e-17, 9.0955e-17, 5.1077e-17, 2.9601e-17, 1.5768e-19, 1.0322e-17,\n 7.1000e-17, 1.2961e-18, 8.4159e-17, 8.6345e-17, 4.4083e-18, 2.9197e-17,\n 5.2889e-17, 5.0118e-18, 1.4516e-17, 3.0979e-18, 2.5135e-17, 4.5674e-16,\n 2.7105e-18, 4.3073e-22, 1.5305e-16, 1.9096e-16, 1.1131e-18, 2.5273e-18,\n 3.8511e-17, 1.2219e-16, 5.5562e-18, 7.1101e-17, 5.5829e-18, 1.3140e-16,\n 2.4821e-17, 1.8640e-16, 1.7778e-16, 1.0360e-17, 2.4611e-17, 5.8801e-17,\n 4.0759e-18, 2.9225e-16, 1.3387e-16, 4.2829e-16, 1.2865e-16, 1.4052e-17,\n 5.6277e-18, 3.3663e-17, 6.3242e-19, 3.3647e-16, 4.5574e-18, 3.7943e-17,\n 4.5974e-17, 1.1821e-19, 4.4169e-16, 1.4827e-17, 1.7596e-16, 6.1208e-17,\n 1.3536e-17, 2.4489e-16, 1.0461e-17, 2.5341e-17, 1.1868e-16, 6.0017e-17,\n 3.8373e-17, 7.3915e-17, 3.8473e-18, 1.1130e-19, 3.0997e-18, 2.6139e-17,\n 8.7359e-16, 9.5094e-18, 7.2015e-17, 7.9656e-18, 1.6104e-19, 3.0265e-16,\n 2.9980e-17, 4.6601e-17, 1.2549e-16, 9.2772e-17, 1.0643e-16, 2.4842e-16,\n 1.4140e-16, 1.4377e-18, 5.6179e-18, 4.4060e-18, 8.1870e-17, 3.5911e-16,\n 2.7322e-16, 2.2893e-17, 2.2661e-17, 2.6655e-17, 1.0316e-16, 4.0343e-20,\n 1.4967e-17, 9.8544e-17, 5.7318e-17, 3.6122e-17, 7.8221e-17, 8.9274e-18,\n 2.2150e-17, 8.4292e-17, 1.1979e-17, 3.1920e-16, 1.1726e-16, 1.6304e-17,\n 3.2695e-17, 1.6971e-16, 1.2819e-17, 4.8749e-17, 5.9995e-16, 8.1376e-17,\n 8.0865e-17, 3.9735e-17, 1.6571e-18, 7.3083e-17, 3.9350e-16, 1.3677e-16,\n 1.1403e-16, 2.1709e-18, 8.8883e-18, 7.0577e-17, 3.4421e-17, 7.7114e-17,\n 5.2281e-17, 2.3094e-17, 3.7583e-18, 3.0445e-17, 8.8930e-17, 9.1252e-21,\n 1.7052e-17, 6.5112e-17, 1.5558e-17, 3.7758e-16, 2.2947e-16, 1.8827e-17,\n 2.5685e-17, 1.3495e-17, 1.6894e-18, 1.3519e-15, 7.8110e-18, 1.4354e-17,\n 3.5638e-17, 5.6708e-16, 9.5296e-19, 6.2568e-16, 9.0960e-17, 9.5485e-18,\n 1.2879e-16, 1.1665e-15, 9.2966e-17, 2.2754e-17, 1.1633e-19, 1.5139e-16,\n 5.8259e-18, 2.7407e-16, 5.6402e-17, 9.7399e-17, 1.1027e-19, 8.5636e-17,\n 6.6295e-16, 2.5832e-20, 1.3750e-17, 1.4242e-17, 8.7272e-18, 5.5076e-16,\n 8.5800e-17, 4.0179e-16, 6.7274e-17, 6.2277e-19, 1.1239e-20, 1.3018e-17,\n 1.8096e-18, 1.1852e-16, 3.8119e-18, 3.8328e-19, 2.1406e-16, 8.8486e-17,\n 6.7349e-17, 1.6286e-17, 1.0082e-17, 1.3221e-16, 2.1008e-17, 7.8597e-18,\n 1.6974e-16, 3.3697e-20, 4.0303e-17, 1.5887e-16, 2.5191e-17, 8.4027e-20,\n 1.0309e-19, 6.7869e-17, 3.7890e-18, 3.2995e-16, 1.2748e-16, 9.3515e-18,\n 1.0361e-16, 1.3682e-17, 3.8219e-16, 1.4955e-17], device='cuda:0')" }, "22": { - "step": "tensor(15024.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.7728e-19, 2.0855e-16, 1.6527e-15, 2.3198e-17, 2.5458e-16, 9.5596e-16,\n 1.1329e-17, 1.4186e-15, 1.2259e-16, 1.6428e-17, 5.7931e-15, 8.1074e-17,\n 2.0834e-17, 1.9072e-16, 2.3440e-16, 9.2636e-16, 4.7735e-16, 5.4128e-17,\n 1.5953e-15, 8.5365e-16, 7.5143e-16, 7.2990e-17, 1.6061e-16, 6.2229e-16,\n 5.9881e-16, 4.3333e-17, 1.2401e-17, 2.3665e-15, 2.6417e-19, 8.4665e-17,\n 3.6699e-17, 5.7575e-16, 1.1063e-15, 8.0002e-17, 6.4466e-17, 4.3123e-16,\n 5.6032e-18, 8.2750e-17, 1.1808e-15, 7.5655e-19, 6.3387e-17, 7.9912e-17,\n 2.0796e-15, 3.5145e-16, 8.9731e-17, 8.3548e-18, 2.1718e-18, 9.0777e-18,\n 2.1393e-17, 8.9560e-19, 5.1636e-19, 1.7220e-17, 1.0745e-16, 2.6012e-16,\n 1.6127e-19, 4.6125e-16, 1.0143e-15, 4.5420e-15, 3.9352e-16, 9.4888e-18,\n 7.3538e-15, 2.6096e-18, 1.0124e-17, 3.6286e-16, 1.2170e-15, 1.7171e-15,\n 4.0395e-17, 6.1529e-17, 6.0137e-15, 7.2757e-17, 9.1356e-16, 1.8141e-16,\n 4.1682e-16, 2.9556e-16, 2.0616e-18, 5.7230e-17, 8.7038e-16, 3.4320e-17,\n 2.2275e-19, 2.1937e-18, 3.1793e-17, 1.0844e-17, 1.8742e-15, 1.0549e-15,\n 1.5965e-16, 1.5406e-16, 5.1049e-16, 1.2458e-16, 2.0238e-16, 3.6837e-15,\n 1.1959e-16, 1.8653e-16, 2.4088e-18, 1.2564e-15, 2.0456e-16, 2.6562e-17,\n 9.6374e-18, 3.4975e-17, 1.6323e-15, 4.3542e-16, 8.0735e-18, 1.2199e-15,\n 1.4215e-17, 5.1709e-18, 3.7381e-16, 2.5528e-16, 2.6470e-18, 9.8451e-17,\n 7.3562e-17, 1.0000e-16, 3.3741e-18, 4.4554e-18, 4.8831e-17, 1.3771e-18,\n 9.8981e-17, 5.2518e-17, 1.3476e-15, 1.7236e-17, 7.7559e-16, 3.4016e-16,\n 9.4283e-18, 2.6004e-16, 4.6997e-16, 5.3881e-17, 7.3821e-16, 3.1776e-18,\n 1.9997e-15, 5.3448e-18, 1.1553e-16, 7.7414e-19, 4.6093e-18, 4.3655e-18,\n 3.7509e-18, 1.8986e-16, 4.4937e-17, 8.2942e-16, 1.4378e-17, 6.7252e-18,\n 4.6459e-16, 6.5560e-17, 2.4833e-18, 2.9516e-19, 5.7446e-15, 1.4817e-14,\n 2.7331e-16, 1.9758e-18, 6.1831e-16, 4.9458e-16, 4.0881e-16, 1.3334e-17,\n 1.2321e-17, 5.2495e-16, 8.5276e-16, 3.4685e-16, 5.7096e-16, 1.8737e-16,\n 7.7251e-21, 8.8900e-18, 3.1603e-17, 1.4904e-15, 4.2984e-19, 2.9876e-16,\n 3.3113e-16, 9.9370e-18, 5.0518e-18, 1.1128e-14, 1.1064e-15, 4.7073e-18,\n 2.8514e-15, 6.7464e-19, 2.2863e-16, 6.6491e-18, 6.6718e-16, 1.2866e-16,\n 2.0193e-16, 1.9461e-15, 2.3144e-16, 1.3290e-16, 1.0736e-17, 1.0709e-15,\n 1.2925e-16, 2.8864e-16, 9.2754e-15, 6.9884e-17, 1.2255e-17, 4.1283e-19,\n 2.2563e-17, 1.1512e-15, 2.3739e-16, 3.7270e-16, 1.6177e-15, 5.0556e-16,\n 1.6308e-17, 1.0779e-17, 9.0519e-16, 2.1001e-16, 2.1052e-19, 4.7185e-16,\n 1.5780e-17, 2.4811e-16, 6.5940e-16, 6.2104e-17, 1.5161e-16, 4.0782e-17,\n 7.1902e-17, 3.1521e-15, 3.0502e-17, 3.7030e-16, 1.4423e-18, 2.6540e-15,\n 3.8437e-16, 7.6943e-21, 1.4520e-18, 3.8028e-18, 1.8413e-15, 5.9694e-18,\n 1.4642e-16, 8.5087e-16, 4.4506e-17, 3.7820e-16, 4.1521e-16, 3.4318e-16,\n 1.9923e-17, 6.0163e-16, 1.1679e-16, 2.1048e-16, 2.8367e-15, 6.3539e-17,\n 2.4410e-18, 2.8859e-18, 1.0495e-16, 2.0768e-16, 7.6818e-19, 3.6231e-16,\n 2.4476e-17, 1.1287e-15, 4.0963e-16, 7.3942e-16, 2.1161e-16, 6.2402e-20,\n 4.9130e-17, 1.2506e-17, 1.7152e-15, 1.8476e-16, 5.5095e-17, 4.3705e-16,\n 1.8157e-17, 5.1497e-17, 1.6171e-15, 5.4078e-17, 4.7310e-16, 1.0912e-18,\n 1.8957e-16, 1.9677e-17, 1.1018e-18, 4.9685e-15], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2368e-17, 8.4023e-17, 0.0000e+00, ..., 1.2235e-16, 2.0007e-16,\n 3.0611e-17],\n [1.0260e-17, 1.1451e-17, 0.0000e+00, ..., 1.8896e-17, 5.8287e-17,\n 3.7050e-20],\n [1.3775e-17, 7.4828e-18, 0.0000e+00, ..., 1.2584e-17, 2.6886e-17,\n 2.1652e-18],\n ...,\n [2.3528e-17, 2.9009e-17, 0.0000e+00, ..., 3.5829e-17, 6.7798e-17,\n 2.1567e-17],\n [7.1635e-17, 2.2745e-17, 0.0000e+00, ..., 2.2610e-17, 1.4347e-16,\n 1.3658e-17],\n [3.5064e-19, 4.9105e-19, 0.0000e+00, ..., 5.2950e-18, 8.4392e-18,\n 1.7225e-18]], device='cuda:0')" }, "23": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.6597e-15, 3.8809e-15, 2.7928e-18, ..., 2.7360e-16, 4.9148e-16,\n 9.1046e-17],\n [2.8743e-15, 3.2533e-15, 2.0365e-20, ..., 1.8131e-16, 3.7957e-16,\n 7.8973e-17],\n [8.5812e-16, 1.0088e-15, 2.1220e-18, ..., 7.7261e-17, 9.5102e-17,\n 3.0261e-17],\n ...,\n [8.4557e-17, 5.3081e-17, 4.1269e-19, ..., 5.2408e-18, 1.4605e-17,\n 2.2112e-18],\n [2.8957e-16, 3.0803e-16, 3.9837e-19, ..., 2.9625e-17, 3.6062e-17,\n 1.4313e-17],\n [2.6047e-16, 3.7875e-16, 3.3349e-19, ..., 3.5783e-17, 3.3869e-17,\n 1.9950e-17]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.7382e-14, 6.7257e-15, 1.4855e-14, 1.1179e-14, 3.8121e-15, 2.0448e-15,\n 8.7957e-15, 5.2331e-18, 3.5074e-15, 5.9489e-16, 6.6042e-14, 1.3077e-14,\n 1.7594e-14, 1.1415e-14, 3.3406e-15, 8.2443e-15, 3.4549e-14, 2.2600e-15,\n 6.5409e-15, 6.9671e-14, 2.7502e-16, 2.6451e-14, 1.1514e-14, 6.6520e-15,\n 8.9018e-16, 2.7744e-14, 8.7638e-15, 1.3934e-15, 9.7429e-16, 7.7602e-15,\n 3.8331e-14, 7.9813e-15, 9.7909e-16, 2.8045e-13, 2.7702e-14, 1.9411e-14,\n 3.3242e-15, 2.4091e-19, 2.5903e-14, 1.3064e-13, 1.6430e-14, 1.0029e-14,\n 1.7729e-15, 1.0199e-14, 5.9554e-14, 4.4187e-14, 2.4988e-14, 1.4621e-14,\n 3.6837e-15, 6.3456e-14, 7.1155e-16, 1.1470e-13, 7.9500e-14, 1.8131e-14,\n 1.8398e-15, 3.3240e-16, 4.3616e-14, 1.3867e-15, 1.9915e-14, 4.0155e-15,\n 9.3716e-15, 1.0747e-14, 5.7175e-14, 3.0347e-14, 1.3847e-15, 3.5827e-15,\n 2.1385e-14, 3.8137e-15, 2.9945e-14, 7.1995e-16, 1.0576e-18, 1.2286e-14,\n 2.2596e-14, 3.9693e-16, 9.5830e-15, 1.1711e-15, 3.3211e-15, 6.3739e-14,\n 9.3709e-16, 4.0058e-16, 5.1210e-15, 4.4911e-14, 1.2808e-18, 3.3284e-15,\n 9.3260e-16, 3.3831e-14, 1.3614e-14, 7.6286e-15, 1.3459e-14, 1.9714e-13,\n 2.6409e-15, 4.4450e-14, 5.6688e-14, 8.5734e-15, 1.0757e-13, 8.0372e-15,\n 9.5170e-15, 6.0424e-14, 2.0306e-14, 2.3678e-14, 5.2599e-14, 2.9613e-15,\n 5.7041e-16, 2.1251e-14, 7.8753e-16, 1.1343e-13, 6.3898e-15, 1.2934e-14,\n 3.9507e-15, 1.8269e-17, 9.2760e-14, 6.6373e-15, 9.2125e-14, 2.8122e-14,\n 3.0768e-15, 5.1179e-15, 6.0976e-15, 5.1818e-15, 3.9099e-15, 9.5026e-15,\n 2.8444e-14, 1.6634e-14, 2.8262e-15, 2.2544e-18, 1.6728e-17, 1.0236e-14,\n 5.6951e-14, 3.6927e-14, 2.9881e-15, 4.9615e-14, 3.8930e-17, 2.4519e-13,\n 1.1155e-14, 2.0736e-14, 3.5573e-14, 9.5511e-14, 3.0302e-14, 9.6287e-14,\n 2.6265e-14, 1.1982e-17, 3.4357e-16, 1.7939e-15, 1.1531e-14, 1.0383e-13,\n 1.8104e-14, 4.9024e-15, 4.3382e-15, 1.6814e-15, 1.0321e-13, 2.2161e-15,\n 1.8099e-15, 8.6214e-15, 9.4205e-15, 3.7495e-14, 1.2782e-14, 1.7119e-15,\n 9.0245e-15, 9.5845e-14, 2.7097e-15, 1.3196e-13, 3.3540e-14, 4.8758e-14,\n 5.0900e-15, 4.4369e-14, 3.3938e-14, 2.7963e-14, 2.6943e-13, 3.7903e-14,\n 4.1685e-14, 5.3910e-15, 1.7196e-15, 7.2511e-15, 4.9971e-14, 1.5373e-14,\n 2.8671e-14, 3.0708e-16, 2.5258e-15, 1.6848e-15, 3.4202e-15, 1.2041e-14,\n 1.7687e-15, 9.7568e-15, 6.4389e-15, 8.6301e-15, 6.6582e-14, 2.2630e-16,\n 6.7440e-15, 2.0649e-14, 3.9656e-15, 1.0704e-13, 1.1286e-14, 1.6125e-14,\n 1.7415e-14, 1.3761e-15, 5.8545e-18, 3.0624e-13, 3.6406e-15, 3.7741e-15,\n 2.2530e-14, 5.8663e-14, 2.3784e-18, 1.6654e-14, 6.3110e-14, 7.4670e-16,\n 3.7931e-14, 1.2625e-13, 1.9894e-14, 4.0864e-15, 1.5263e-17, 1.7254e-13,\n 3.5469e-15, 5.9174e-14, 1.8183e-15, 8.1302e-15, 1.3757e-15, 1.6411e-14,\n 2.1696e-14, 7.4605e-17, 1.6509e-15, 2.1370e-15, 7.1369e-15, 2.7729e-14,\n 5.1372e-15, 9.8307e-14, 1.7328e-14, 7.0787e-16, 2.1875e-16, 1.0566e-15,\n 3.9270e-15, 8.9984e-15, 2.7305e-16, 9.5890e-16, 1.5678e-14, 1.2696e-14,\n 5.1000e-14, 1.5979e-14, 3.7350e-15, 2.8482e-14, 4.2603e-15, 7.4726e-15,\n 2.7317e-14, 5.6744e-17, 3.5725e-14, 4.4326e-14, 1.5393e-14, 3.5009e-17,\n 1.2225e-18, 1.5274e-13, 7.3742e-16, 5.6130e-14, 1.9257e-14, 3.3116e-15,\n 2.5285e-14, 2.6454e-14, 6.1402e-14, 1.7316e-15], device='cuda:0')" }, "24": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.9492e-12, 1.5273e-12, 5.0091e-13, 3.2029e-13, 3.0459e-14, 2.0195e-12,\n 1.7057e-14, 1.8088e-12, 1.7723e-12, 1.6160e-15, 2.2023e-12, 1.5029e-13,\n 2.0045e-13, 1.4088e-14, 2.1873e-15, 1.2821e-14, 3.1055e-17, 2.2546e-14,\n 4.4215e-13, 5.2443e-13, 1.1868e-13, 2.1848e-13, 2.6248e-13, 2.7287e-15,\n 1.5724e-13, 1.6645e-13, 6.3730e-14, 4.1270e-13, 5.3523e-14, 2.2555e-15,\n 2.6922e-14, 1.5211e-13, 2.4411e-13, 3.1120e-13, 1.8832e-16, 1.2202e-16,\n 1.6371e-13, 2.2863e-15, 1.7053e-13, 1.3221e-13, 3.5394e-16, 8.3121e-13,\n 2.1162e-12, 5.5265e-16, 5.6588e-14, 1.9133e-15, 2.7180e-13, 1.6056e-15,\n 5.9521e-14, 5.5003e-13, 2.9631e-15, 4.3757e-14, 8.9727e-16, 2.5931e-13,\n 1.4716e-14, 6.0909e-15, 3.9894e-12, 3.4813e-12, 1.2075e-14, 4.4043e-16,\n 5.2040e-12, 3.3759e-14, 4.0289e-13, 1.4072e-12, 1.1195e-13, 2.4212e-12,\n 5.6678e-15, 2.1766e-14, 2.5625e-13, 4.2213e-14, 5.0970e-13, 9.5387e-14,\n 8.5544e-14, 3.2676e-14, 1.9283e-14, 5.4473e-14, 2.6574e-13, 1.2280e-16,\n 1.6824e-13, 5.0231e-14, 2.4999e-13, 5.7575e-13, 2.1584e-14, 9.7974e-16,\n 2.9084e-15, 1.5019e-13, 6.0472e-13, 3.4518e-15, 9.2287e-13, 5.6534e-14,\n 2.0644e-14, 1.5181e-15, 2.7439e-14, 5.1781e-13, 1.1420e-12, 6.0607e-13,\n 3.9361e-15, 2.9574e-15, 4.9807e-13, 3.8876e-14, 2.5547e-13, 2.9164e-15,\n 6.4819e-14, 6.2879e-16, 3.5148e-14, 1.2062e-15, 2.3589e-13, 7.5842e-17,\n 2.7739e-13, 7.1528e-14, 1.3263e-14, 1.2868e-14, 1.2326e-13, 1.3299e-15,\n 3.0648e-13, 2.8734e-13, 2.1683e-14, 2.8635e-12, 9.8166e-17, 1.0743e-12,\n 6.5522e-14, 9.7585e-15, 1.7385e-13, 1.5452e-15, 3.1643e-15, 2.9901e-15,\n 1.8529e-12, 1.1151e-13, 5.3895e-16, 8.4596e-15, 3.7296e-14, 5.9388e-14,\n 1.4165e-13, 2.1808e-13, 7.0508e-15, 1.7563e-13, 3.9206e-13, 3.5369e-14,\n 8.5045e-16, 1.8631e-14, 1.6314e-13, 3.8993e-13, 4.9632e-13, 9.7987e-12,\n 6.8808e-15, 1.8067e-16, 2.8487e-13, 3.7455e-14, 1.9843e-15, 2.5982e-14,\n 6.3712e-14, 1.6882e-14, 2.8983e-14, 1.8397e-13, 1.4866e-14, 1.4571e-14,\n 6.9745e-14, 1.5882e-12, 1.3638e-14, 5.2193e-12, 8.0778e-14, 8.3223e-15,\n 3.6149e-16, 1.1933e-12, 2.9919e-13, 5.9578e-13, 5.9960e-15, 1.1346e-13,\n 2.0822e-12, 4.7343e-15, 1.0917e-12, 2.7480e-13, 1.6164e-13, 2.8665e-14,\n 2.3504e-13, 1.6517e-12, 5.9263e-13, 2.7113e-13, 4.8782e-14, 7.3466e-13,\n 1.8308e-12, 1.7780e-13, 2.0673e-12, 2.0387e-16, 2.3351e-13, 1.2592e-16,\n 3.1422e-13, 1.5782e-14, 4.9943e-14, 5.5174e-15, 1.2473e-12, 8.0861e-16,\n 1.9918e-13, 4.5153e-15, 7.4751e-14, 9.3311e-14, 6.0406e-14, 1.8473e-12,\n 1.0092e-13, 4.5688e-15, 1.7467e-13, 7.9224e-15, 1.0067e-14, 3.8017e-14,\n 7.1362e-14, 3.4301e-13, 3.3171e-14, 3.1430e-14, 2.4627e-14, 4.6043e-15,\n 1.1524e-14, 3.5732e-13, 6.3795e-16, 1.2896e-12, 1.1116e-13, 5.9827e-14,\n 5.6228e-13, 1.2401e-13, 1.2088e-12, 3.4414e-13, 1.2318e-15, 4.8153e-13,\n 1.2605e-14, 6.0890e-16, 5.3977e-14, 1.5592e-14, 2.5675e-13, 1.0029e-12,\n 4.0472e-16, 8.8393e-15, 1.4255e-13, 9.2787e-14, 1.5511e-15, 1.2009e-14,\n 2.9019e-14, 5.8078e-13, 6.6735e-14, 4.3392e-14, 1.5665e-13, 4.0436e-14,\n 8.5674e-13, 7.8168e-14, 9.3158e-13, 3.2551e-13, 3.3073e-14, 1.5009e-14,\n 3.2653e-14, 3.2784e-13, 5.3492e-13, 6.9316e-15, 5.6415e-13, 5.7076e-15,\n 4.4492e-13, 3.4134e-14, 1.9078e-13, 2.3929e-13], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.4492e-16, 2.0693e-17, 2.3466e-17, 5.0778e-17, 5.9391e-18, 8.0194e-18,\n 2.0619e-17, 6.9010e-20, 4.2194e-18, 4.0478e-19, 2.3552e-16, 3.2123e-17,\n 3.6594e-17, 2.7233e-17, 9.3577e-18, 2.2450e-17, 8.3209e-17, 4.5986e-18,\n 1.4352e-17, 1.5490e-16, 3.4980e-21, 9.0763e-17, 3.5256e-17, 1.8357e-17,\n 3.7518e-18, 5.1004e-17, 1.9560e-17, 4.8683e-18, 3.8223e-18, 1.9162e-17,\n 1.4672e-16, 1.4730e-17, 2.4212e-18, 1.1104e-15, 6.3798e-17, 4.0890e-17,\n 8.0817e-18, 7.0850e-20, 7.1325e-17, 2.9021e-16, 3.1855e-17, 3.3545e-17,\n 4.0479e-18, 8.4415e-17, 1.7776e-16, 8.6894e-17, 1.1372e-16, 3.7375e-17,\n 5.8781e-18, 2.4320e-16, 6.2546e-19, 3.4548e-16, 1.9784e-16, 3.0804e-17,\n 1.5716e-18, 1.1092e-18, 1.4317e-16, 8.9688e-19, 5.5559e-17, 1.0013e-17,\n 2.1029e-17, 3.0948e-17, 2.4645e-16, 8.3337e-17, 9.7964e-19, 5.0259e-18,\n 5.7033e-17, 4.2063e-18, 6.1685e-17, 3.5873e-18, 1.2535e-18, 2.4845e-17,\n 4.6077e-17, 2.3713e-18, 1.7182e-17, 8.0284e-18, 1.0624e-17, 1.6425e-16,\n 1.5021e-18, 4.2682e-19, 1.2175e-17, 1.1819e-16, 1.1379e-19, 2.6588e-17,\n 4.3036e-18, 8.2221e-17, 6.1183e-17, 1.7459e-17, 2.4392e-17, 6.9999e-16,\n 7.2381e-18, 1.0605e-16, 1.9836e-16, 1.2706e-17, 6.9290e-16, 1.5432e-17,\n 1.2280e-17, 1.2141e-16, 5.2818e-17, 6.0904e-17, 2.2282e-16, 2.2330e-17,\n 2.8607e-18, 5.5225e-17, 3.2949e-19, 2.1075e-16, 1.2491e-17, 3.2471e-17,\n 6.1931e-18, 7.8683e-19, 2.0186e-16, 2.5201e-17, 5.7610e-16, 5.7437e-17,\n 2.6708e-18, 9.0062e-18, 1.6905e-17, 7.8706e-18, 1.4099e-17, 3.1821e-17,\n 8.1685e-17, 4.2329e-17, 3.1522e-18, 1.0045e-20, 3.9406e-19, 1.7165e-17,\n 1.2531e-16, 2.1933e-16, 6.6685e-18, 1.6695e-16, 4.9609e-20, 7.9442e-16,\n 2.7837e-17, 4.8358e-17, 5.2770e-17, 3.4694e-16, 6.8777e-17, 2.7954e-16,\n 6.2556e-17, 8.9116e-20, 3.5272e-18, 6.5966e-18, 1.9565e-17, 3.2412e-16,\n 3.8182e-17, 1.6615e-17, 1.3477e-17, 4.7908e-18, 3.5970e-16, 3.1665e-18,\n 7.8236e-18, 1.4452e-17, 2.7456e-17, 1.2878e-16, 2.1129e-17, 8.3709e-18,\n 1.3387e-17, 2.8482e-16, 6.0805e-18, 4.0476e-16, 7.0463e-17, 1.8750e-16,\n 7.1467e-18, 1.5026e-16, 2.0638e-16, 6.2564e-17, 9.2370e-16, 7.1842e-17,\n 1.8038e-16, 2.0109e-17, 1.2757e-18, 1.0387e-17, 9.4177e-17, 4.6893e-17,\n 5.2893e-17, 1.0111e-18, 2.9045e-18, 4.0035e-18, 6.0961e-18, 2.0842e-17,\n 3.2127e-18, 2.2813e-17, 1.0902e-17, 1.4824e-17, 1.4261e-16, 1.4538e-21,\n 1.2215e-17, 5.0234e-17, 5.2330e-18, 2.9079e-16, 1.4105e-17, 4.1638e-17,\n 7.9882e-17, 4.9758e-18, 1.4471e-19, 1.2094e-15, 6.6617e-18, 5.8383e-18,\n 8.9659e-17, 8.4296e-17, 3.9745e-19, 2.8251e-17, 2.1559e-16, 8.8620e-19,\n 1.1625e-16, 2.3699e-16, 3.3151e-17, 6.4300e-18, 4.9712e-21, 5.4927e-16,\n 7.4067e-18, 1.3264e-16, 8.6293e-18, 1.7812e-17, 1.6010e-18, 3.1549e-17,\n 4.0644e-17, 3.0337e-20, 5.0724e-18, 2.6130e-18, 1.7375e-17, 8.0015e-17,\n 2.3432e-17, 3.1609e-16, 6.0177e-17, 2.7687e-18, 9.9537e-20, 1.1078e-17,\n 6.2044e-18, 2.5868e-17, 1.4184e-18, 1.5599e-18, 2.6869e-17, 5.1042e-17,\n 1.2859e-16, 5.2146e-17, 5.8526e-18, 6.2509e-17, 1.3112e-17, 1.1205e-17,\n 6.9383e-17, 2.4066e-20, 3.0204e-16, 1.0819e-16, 2.5202e-17, 2.3836e-20,\n 4.3412e-19, 7.2057e-16, 7.7895e-19, 1.7391e-16, 3.6743e-17, 4.9617e-18,\n 5.7907e-17, 5.4845e-17, 1.3389e-16, 6.4943e-18], device='cuda:0')" }, "25": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8311e-15, 1.5655e-15, 3.1165e-15, 5.3541e-18, 4.3697e-17, 5.0226e-15,\n 5.0450e-17, 1.9172e-15, 8.2854e-16, 2.7957e-18, 6.5539e-16, 2.1551e-15,\n 2.4598e-16, 9.4006e-19, 2.1985e-18, 2.0380e-17, 6.2390e-18, 1.5896e-18,\n 1.5196e-16, 1.6366e-15, 9.7039e-16, 2.5693e-17, 2.3168e-17, 3.1766e-17,\n 1.5435e-15, 5.5794e-17, 2.9940e-17, 5.4797e-17, 4.3237e-17, 3.5064e-18,\n 8.6389e-19, 1.6629e-16, 3.1499e-17, 4.5374e-16, 9.9050e-19, 1.5213e-18,\n 6.4206e-17, 1.4210e-20, 9.7968e-17, 8.2578e-17, 1.1651e-18, 3.9343e-15,\n 1.1171e-14, 4.4094e-18, 4.6006e-16, 3.0801e-19, 1.1694e-16, 5.9653e-20,\n 1.5677e-17, 1.3213e-16, 5.1736e-19, 4.1215e-18, 1.1358e-19, 2.7258e-16,\n 1.0840e-17, 3.6787e-18, 1.9216e-15, 4.3481e-15, 5.3077e-18, 1.0746e-18,\n 7.1892e-15, 7.6440e-18, 1.2168e-15, 6.8445e-16, 4.0249e-17, 3.5943e-15,\n 4.7151e-18, 2.8836e-17, 3.7681e-18, 1.2188e-17, 1.4925e-16, 1.0257e-17,\n 1.3502e-17, 3.1225e-17, 2.0570e-17, 6.7901e-18, 9.5948e-17, 3.0293e-18,\n 4.6492e-17, 8.7111e-18, 6.8311e-16, 9.2080e-17, 6.0364e-17, 1.5881e-17,\n 2.8765e-18, 1.9414e-15, 2.8944e-16, 1.8113e-18, 1.2626e-15, 6.9821e-18,\n 3.9976e-18, 1.6317e-18, 7.5327e-19, 2.5832e-16, 3.3019e-15, 1.5304e-16,\n 3.4097e-18, 2.4528e-18, 2.8188e-16, 2.5002e-17, 4.1315e-16, 3.6464e-18,\n 2.8607e-17, 3.0635e-18, 1.4061e-18, 2.8731e-19, 2.1495e-16, 8.1154e-19,\n 7.5388e-17, 8.0430e-18, 8.0074e-19, 1.6576e-16, 3.3240e-16, 1.2292e-18,\n 2.1824e-16, 8.7068e-16, 2.1489e-18, 6.6243e-15, 1.0697e-19, 1.6829e-15,\n 2.5186e-18, 1.0434e-18, 7.6844e-17, 2.5374e-20, 2.9362e-18, 1.3804e-19,\n 2.3343e-15, 3.2361e-17, 1.9887e-21, 1.0384e-18, 1.7506e-18, 4.9039e-18,\n 6.5116e-17, 3.2938e-16, 3.3142e-18, 4.0529e-17, 1.4457e-15, 6.5945e-18,\n 9.1846e-19, 5.7067e-17, 1.5336e-17, 8.2408e-17, 8.0231e-17, 2.3878e-14,\n 1.3619e-17, 2.4588e-20, 1.8655e-16, 4.9289e-18, 6.8562e-19, 1.8448e-17,\n 1.4180e-16, 2.1320e-18, 2.5911e-19, 7.1147e-17, 3.3348e-18, 4.3799e-17,\n 1.4036e-17, 1.4523e-15, 1.9768e-18, 4.9598e-15, 1.9262e-17, 3.0620e-19,\n 7.2789e-18, 3.5601e-15, 6.1476e-16, 8.5942e-18, 1.3739e-17, 1.6064e-17,\n 5.9389e-15, 1.0654e-19, 1.7025e-15, 2.7999e-17, 1.5045e-16, 8.8731e-18,\n 1.0941e-16, 1.6540e-15, 2.5505e-16, 6.4362e-17, 1.0992e-17, 5.3656e-16,\n 1.2832e-15, 7.2209e-18, 7.1038e-16, 3.8886e-19, 1.4141e-16, 2.3354e-18,\n 2.3362e-16, 2.0581e-17, 6.5972e-17, 9.7664e-18, 3.5856e-16, 6.6542e-18,\n 1.8797e-17, 1.9279e-18, 4.0897e-18, 9.6629e-18, 3.4414e-17, 5.3064e-15,\n 1.6477e-17, 4.4193e-18, 2.8653e-17, 4.5128e-19, 1.9488e-18, 9.4957e-17,\n 1.7504e-16, 7.3422e-17, 2.7989e-17, 1.4028e-18, 4.9309e-18, 1.0573e-17,\n 9.3064e-18, 4.3041e-16, 7.8412e-19, 4.3535e-15, 5.0394e-18, 5.6886e-18,\n 1.8370e-16, 1.6372e-17, 2.3855e-16, 3.0700e-17, 8.9042e-18, 1.0233e-15,\n 8.6754e-18, 1.5841e-18, 1.7847e-16, 6.7932e-19, 4.7725e-18, 1.8221e-15,\n 2.2780e-19, 6.9371e-19, 1.7238e-17, 7.4576e-17, 1.5979e-19, 3.3618e-18,\n 8.7754e-19, 1.9385e-15, 3.7403e-17, 1.3392e-18, 1.9525e-15, 3.2533e-18,\n 1.2504e-15, 2.2885e-17, 4.6180e-16, 4.5936e-16, 1.3970e-17, 5.9419e-19,\n 3.2494e-19, 5.0461e-16, 5.9599e-16, 5.7377e-17, 1.5127e-16, 2.8066e-18,\n 7.6165e-16, 9.5333e-17, 2.2037e-17, 9.9377e-17], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.5550e-16, 3.1344e-17, 4.2293e-17, 5.7361e-17, 8.8991e-18, 1.2740e-17,\n 2.5794e-17, 8.1007e-20, 7.6321e-18, 7.0551e-19, 2.1068e-16, 4.7389e-17,\n 5.2939e-17, 5.2891e-17, 1.6930e-17, 4.1230e-17, 1.5063e-16, 6.4025e-18,\n 2.0937e-17, 3.0113e-16, 5.4926e-20, 1.2057e-16, 5.7412e-17, 3.1118e-17,\n 4.7238e-18, 9.0583e-17, 4.1567e-17, 8.5515e-18, 6.8418e-18, 2.2977e-17,\n 1.7369e-16, 3.5521e-17, 6.0632e-18, 9.8864e-16, 1.2119e-16, 8.4485e-17,\n 6.2863e-18, 5.3574e-20, 1.1627e-16, 4.5926e-16, 5.7537e-17, 4.8464e-17,\n 1.0092e-17, 5.6121e-17, 1.9474e-16, 1.5449e-16, 1.1484e-16, 7.0592e-17,\n 9.5432e-18, 2.0446e-16, 6.3327e-19, 4.6836e-16, 2.5827e-16, 5.7690e-17,\n 3.3181e-18, 2.7036e-18, 1.9452e-16, 2.4816e-18, 8.5562e-17, 2.2380e-17,\n 2.4147e-17, 5.0417e-17, 2.4568e-16, 1.3182e-16, 1.7002e-18, 9.5362e-18,\n 9.5064e-17, 8.8715e-18, 1.2727e-16, 4.4308e-18, 1.0357e-18, 5.8619e-17,\n 7.2662e-17, 2.5039e-18, 2.7420e-17, 9.0543e-18, 1.9512e-17, 2.6421e-16,\n 2.4638e-18, 3.7832e-19, 2.7115e-17, 2.0122e-16, 3.0372e-19, 2.2512e-17,\n 5.6451e-18, 1.4390e-16, 6.8367e-17, 3.4491e-17, 4.1928e-17, 7.9226e-16,\n 1.3045e-17, 1.9263e-16, 1.8053e-16, 2.6654e-17, 4.6097e-16, 2.4777e-17,\n 3.0059e-17, 2.1357e-16, 9.0505e-17, 1.1070e-16, 2.2640e-16, 1.9900e-17,\n 5.4220e-18, 6.1783e-17, 9.5472e-19, 3.8751e-16, 1.5621e-17, 6.2609e-17,\n 1.1335e-17, 1.4041e-18, 3.1849e-16, 3.4107e-17, 3.9314e-16, 9.6877e-17,\n 8.1274e-18, 1.6630e-17, 3.1422e-17, 1.5690e-17, 1.9093e-17, 4.9136e-17,\n 8.4115e-17, 5.0677e-17, 6.3440e-18, 6.5776e-21, 4.1701e-19, 3.1286e-17,\n 2.4279e-16, 1.6802e-16, 1.5110e-17, 1.5111e-16, 6.5841e-20, 9.9422e-16,\n 5.0555e-17, 8.9074e-17, 1.1342e-16, 3.1997e-16, 1.3496e-16, 3.2995e-16,\n 1.1307e-16, 1.9394e-19, 4.4737e-18, 1.2025e-17, 3.5891e-17, 3.4938e-16,\n 7.6820e-17, 2.6484e-17, 2.1821e-17, 8.8066e-18, 3.4985e-16, 3.5805e-18,\n 1.2724e-17, 2.8557e-17, 4.5614e-17, 1.1104e-16, 3.6850e-17, 1.2188e-17,\n 2.4266e-17, 3.2470e-16, 8.0681e-18, 5.3800e-16, 1.1147e-16, 1.4685e-16,\n 1.2056e-17, 1.9824e-16, 1.5753e-16, 8.7857e-17, 9.4043e-16, 1.2870e-16,\n 1.7731e-16, 2.6356e-17, 3.2903e-18, 2.0585e-17, 1.7308e-16, 7.2154e-17,\n 9.3148e-17, 2.3734e-18, 5.6122e-18, 8.1931e-18, 7.8913e-18, 5.5229e-17,\n 4.7125e-18, 2.4219e-17, 1.7643e-17, 2.6713e-17, 2.7821e-16, 2.4488e-20,\n 2.1905e-17, 6.1204e-17, 1.0414e-17, 4.4482e-16, 3.7905e-17, 4.6886e-17,\n 8.2697e-17, 7.9059e-18, 3.8117e-19, 1.2286e-15, 8.6839e-18, 1.0002e-17,\n 1.0475e-16, 2.0128e-16, 9.6162e-19, 7.3967e-17, 1.9855e-16, 1.6750e-18,\n 1.5706e-16, 4.4323e-16, 6.5443e-17, 1.0537e-17, 8.8176e-21, 6.2198e-16,\n 6.9036e-18, 2.4851e-16, 1.2845e-17, 3.9629e-17, 2.1629e-18, 5.5029e-17,\n 7.2362e-17, 2.9784e-19, 1.0053e-17, 4.8445e-18, 1.4524e-17, 1.2126e-16,\n 2.7425e-17, 4.1177e-16, 8.1964e-17, 5.6893e-18, 7.8961e-20, 9.7752e-18,\n 8.2378e-18, 4.2190e-17, 3.0317e-18, 1.6802e-18, 7.0499e-17, 6.4688e-17,\n 2.1912e-16, 7.5320e-17, 8.6773e-18, 1.2045e-16, 2.4788e-17, 2.1244e-17,\n 1.2115e-16, 5.2807e-20, 1.6816e-16, 1.4743e-16, 4.5038e-17, 1.7934e-19,\n 8.9786e-19, 5.1973e-16, 1.2521e-18, 2.2779e-16, 5.8832e-17, 7.6389e-18,\n 7.8197e-17, 7.9025e-17, 1.9968e-16, 1.1729e-17], device='cuda:0')" }, "26": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.4998e-15, 1.9059e-15, 1.0664e-15, 3.5294e-16, 8.5791e-17, 2.6048e-15,\n 1.6581e-17, 2.4687e-15, 2.2219e-15, 8.1525e-19, 2.7567e-15, 6.4112e-16,\n 3.7078e-16, 1.9837e-17, 8.8381e-19, 7.6047e-18, 2.8145e-19, 3.0912e-17,\n 5.3294e-16, 1.0823e-15, 4.2432e-16, 2.7831e-16, 2.9592e-16, 1.0986e-17,\n 6.1457e-16, 2.0289e-16, 1.6041e-16, 4.5559e-16, 6.7176e-17, 1.2604e-18,\n 7.9511e-18, 3.2540e-16, 3.1123e-16, 7.1221e-16, 1.0710e-19, 1.0330e-18,\n 2.1671e-16, 5.4484e-19, 2.2538e-16, 1.5738e-16, 6.1451e-18, 1.1846e-15,\n 3.2102e-15, 1.5804e-18, 3.1863e-16, 3.0757e-18, 2.3504e-16, 2.3941e-18,\n 7.3777e-17, 6.9220e-16, 4.9564e-18, 5.8983e-17, 1.3547e-18, 2.0348e-16,\n 2.0050e-17, 1.4213e-18, 5.0161e-15, 4.3999e-15, 1.7677e-18, 7.7926e-18,\n 6.4780e-15, 5.0072e-17, 4.9996e-16, 1.8180e-15, 1.3964e-16, 3.1031e-15,\n 1.8418e-18, 3.6909e-17, 3.2536e-16, 5.7056e-17, 6.3861e-16, 9.5571e-17,\n 1.3808e-16, 1.2382e-16, 2.5473e-17, 1.6958e-17, 3.3664e-16, 1.4574e-17,\n 2.6675e-16, 6.6199e-17, 3.6292e-16, 7.2128e-16, 2.0738e-17, 5.6123e-18,\n 2.0200e-17, 4.4352e-16, 7.5991e-16, 4.2122e-18, 1.2467e-15, 8.3487e-17,\n 3.0019e-17, 1.3318e-17, 1.3330e-17, 6.3884e-16, 1.4172e-15, 8.2414e-16,\n 3.6737e-17, 3.1624e-19, 7.6240e-16, 1.3265e-16, 3.9913e-16, 6.6274e-19,\n 1.0968e-16, 3.8654e-18, 1.9203e-17, 1.7482e-17, 3.0784e-16, 3.7091e-20,\n 4.1212e-16, 9.2444e-17, 1.8369e-17, 1.4777e-16, 2.3213e-16, 1.0351e-18,\n 4.2293e-16, 4.1649e-16, 3.0973e-17, 3.8312e-15, 5.1727e-19, 1.2993e-15,\n 5.0101e-17, 1.1267e-17, 2.1584e-16, 1.0974e-18, 2.8456e-17, 2.4072e-20,\n 2.2379e-15, 9.3115e-17, 1.4121e-20, 3.8618e-18, 5.0761e-17, 3.0514e-17,\n 1.1211e-16, 3.8298e-16, 4.8841e-18, 2.2763e-16, 7.2971e-16, 4.1386e-17,\n 8.9699e-19, 2.2269e-17, 2.1105e-16, 4.0350e-16, 7.5981e-16, 1.2724e-14,\n 2.5919e-18, 2.3648e-19, 4.0086e-16, 5.0007e-17, 8.1410e-20, 2.1990e-17,\n 2.5786e-16, 2.6193e-17, 1.9798e-17, 2.3238e-16, 1.9715e-17, 8.5410e-17,\n 1.7533e-17, 1.9704e-15, 1.5794e-17, 6.5674e-15, 3.4179e-17, 1.2330e-17,\n 1.1695e-18, 1.7142e-15, 3.5009e-16, 7.5824e-16, 8.8858e-18, 1.4495e-16,\n 2.6279e-15, 6.8638e-18, 1.3203e-15, 3.1011e-16, 3.8402e-16, 3.6644e-17,\n 3.2700e-16, 2.1151e-15, 6.5215e-16, 3.6477e-16, 3.9405e-17, 9.1274e-16,\n 2.2756e-15, 1.9507e-16, 2.6034e-15, 1.1593e-17, 2.9154e-16, 8.9385e-19,\n 4.1444e-16, 7.1656e-18, 1.6083e-16, 3.9409e-18, 1.5663e-15, 7.0355e-19,\n 2.5305e-16, 5.8775e-18, 5.6115e-17, 1.2187e-16, 4.9031e-17, 2.3170e-15,\n 1.3183e-16, 1.2066e-18, 1.4701e-16, 1.1665e-17, 5.7872e-18, 1.6128e-16,\n 5.8863e-17, 4.3284e-16, 4.9418e-17, 4.1660e-17, 5.7515e-17, 2.2381e-17,\n 3.4338e-18, 4.1088e-16, 9.4044e-19, 1.8794e-15, 1.4360e-16, 5.4685e-17,\n 7.1652e-16, 1.5637e-16, 1.5180e-15, 4.4217e-16, 3.3308e-18, 8.3253e-16,\n 7.4315e-18, 6.0611e-19, 2.3358e-16, 1.4793e-17, 3.3033e-16, 1.4483e-15,\n 1.2194e-19, 1.9221e-18, 2.1433e-16, 1.1477e-16, 2.6905e-19, 5.7903e-17,\n 6.6376e-18, 1.0843e-15, 1.2672e-16, 5.7111e-17, 6.1184e-16, 1.0882e-17,\n 1.0732e-15, 1.0749e-16, 1.0884e-15, 5.9718e-16, 9.7446e-17, 2.0657e-17,\n 4.4281e-17, 4.1560e-16, 7.2387e-16, 9.3609e-17, 7.2314e-16, 9.0727e-19,\n 5.2030e-16, 3.0986e-17, 2.4097e-16, 5.3366e-16], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7142e-17, 2.3695e-17, 0.0000e+00, ..., 5.0155e-17, 8.0875e-17,\n 9.8792e-18],\n [1.5848e-18, 2.2032e-18, 0.0000e+00, ..., 6.1574e-20, 1.9849e-18,\n 6.3388e-20],\n [1.2809e-17, 1.0010e-17, 0.0000e+00, ..., 1.0303e-17, 1.1128e-17,\n 7.2378e-18],\n ...,\n [6.3784e-18, 5.2418e-18, 0.0000e+00, ..., 3.6977e-18, 3.6114e-17,\n 1.0423e-18],\n [3.7431e-17, 1.1584e-17, 0.0000e+00, ..., 9.9827e-18, 8.1814e-17,\n 1.8119e-17],\n [7.5696e-18, 1.3308e-18, 0.0000e+00, ..., 1.8361e-18, 1.5772e-17,\n 1.6582e-19]], device='cuda:0')" }, "27": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.7287e-17, 1.0874e-19, 2.1969e-18, ..., 1.6966e-19, 6.1186e-19,\n 3.0017e-20],\n [3.7614e-15, 3.7221e-15, 2.5889e-18, ..., 2.6236e-16, 5.0416e-16,\n 1.0550e-16],\n [3.0143e-16, 3.5346e-16, 1.4374e-19, ..., 2.7881e-17, 4.1356e-17,\n 1.7364e-17],\n ...,\n [3.1177e-15, 3.3216e-15, 2.2773e-18, ..., 1.8137e-16, 4.2989e-16,\n 7.4442e-17],\n [4.3335e-17, 5.9721e-17, 8.5525e-19, ..., 4.8615e-18, 3.8050e-18,\n 1.2151e-18],\n [5.4863e-15, 5.6843e-15, 1.5604e-18, ..., 4.2262e-16, 7.0299e-16,\n 1.7015e-16]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.9051e-14, 1.3029e-15, 1.0341e-14, 1.3437e-14, 4.0918e-15, 1.0314e-15,\n 1.3573e-14, 3.3032e-16, 6.0912e-15, 9.2963e-16, 3.0882e-14, 1.0688e-13,\n 1.8196e-14, 2.1398e-14, 2.2326e-14, 9.2414e-14, 1.0357e-14, 1.0268e-14,\n 3.4658e-14, 4.0295e-14, 1.2922e-16, 4.3294e-15, 8.7319e-15, 1.0826e-14,\n 1.0027e-14, 1.8588e-14, 1.4777e-14, 2.9025e-15, 4.2413e-16, 4.8309e-15,\n 2.5672e-14, 1.6961e-14, 2.6596e-15, 1.3045e-13, 4.7251e-14, 2.8960e-14,\n 8.6151e-16, 1.6328e-17, 1.2148e-14, 3.9228e-13, 5.8520e-14, 8.2273e-15,\n 2.5790e-15, 1.6141e-15, 6.7658e-14, 1.1915e-13, 1.8559e-14, 4.1115e-14,\n 5.1220e-15, 3.2914e-14, 6.3473e-16, 7.0074e-14, 1.7866e-14, 1.6096e-14,\n 2.5580e-15, 2.2313e-16, 3.0513e-14, 1.1323e-15, 2.0280e-14, 1.7747e-15,\n 1.0422e-14, 1.9951e-14, 5.0295e-14, 2.6805e-14, 7.5601e-16, 1.8706e-15,\n 2.0110e-14, 1.3092e-15, 1.2752e-14, 2.4230e-15, 4.4235e-17, 1.2358e-14,\n 5.2232e-14, 3.8362e-15, 1.0575e-14, 7.0691e-17, 6.8674e-16, 4.2663e-14,\n 2.7531e-15, 9.0911e-19, 2.5252e-14, 3.0390e-14, 4.3519e-17, 2.5797e-15,\n 1.8005e-14, 7.6905e-14, 6.1820e-15, 2.6554e-14, 5.2601e-15, 4.3863e-14,\n 3.1063e-14, 1.9106e-14, 4.8563e-14, 8.8609e-15, 1.5647e-14, 2.3983e-15,\n 8.1579e-15, 5.5851e-14, 1.3552e-14, 1.1279e-13, 9.9918e-14, 2.4448e-15,\n 4.3456e-17, 1.3420e-14, 7.5418e-16, 8.8484e-14, 7.0917e-15, 6.4765e-15,\n 1.9298e-14, 1.2034e-16, 9.5657e-15, 6.2421e-17, 6.2343e-14, 6.1069e-14,\n 4.8140e-15, 5.3386e-14, 4.5586e-15, 8.7069e-15, 1.2900e-14, 1.3250e-14,\n 4.6176e-15, 1.0506e-15, 1.8945e-15, 2.5132e-18, 2.4619e-16, 1.1858e-14,\n 1.0077e-13, 1.6644e-14, 4.2087e-14, 1.9137e-14, 1.0568e-16, 1.2144e-13,\n 1.9579e-14, 1.8097e-14, 4.4194e-14, 2.5786e-14, 2.8737e-14, 3.8561e-14,\n 2.6193e-14, 1.0092e-16, 7.6439e-16, 1.4084e-15, 1.0370e-14, 1.3196e-13,\n 2.5213e-14, 1.5402e-15, 2.3778e-14, 2.8724e-15, 2.4277e-14, 6.0054e-16,\n 1.1699e-15, 4.1138e-14, 3.0012e-14, 3.7557e-14, 7.9751e-15, 1.3445e-15,\n 2.3914e-14, 7.0796e-14, 1.2420e-14, 1.5650e-14, 1.9335e-14, 2.2199e-14,\n 1.2052e-14, 1.6092e-14, 1.5066e-14, 1.4181e-14, 1.0180e-13, 7.8405e-14,\n 1.1445e-14, 4.3182e-14, 4.2992e-15, 3.1024e-15, 6.4070e-14, 6.0958e-15,\n 4.5756e-14, 3.4033e-15, 1.3095e-15, 4.1315e-15, 4.4183e-15, 4.9129e-15,\n 5.7909e-15, 4.9844e-15, 1.8156e-15, 1.2934e-14, 1.9903e-13, 3.0402e-17,\n 9.8670e-15, 8.2739e-15, 5.8411e-15, 5.9695e-14, 1.7549e-14, 2.1521e-15,\n 8.8749e-15, 1.2236e-14, 9.2938e-17, 1.0842e-13, 5.7047e-15, 8.6878e-16,\n 2.7096e-14, 1.2023e-13, 1.0068e-19, 1.1757e-14, 5.8357e-14, 1.2987e-14,\n 1.0845e-14, 8.4668e-14, 2.0940e-14, 4.5114e-15, 4.4969e-18, 1.0154e-13,\n 2.0627e-15, 1.0175e-13, 6.2513e-17, 2.2562e-14, 3.2382e-16, 2.7109e-14,\n 3.8158e-14, 6.4869e-19, 3.2257e-15, 4.4854e-15, 3.8181e-15, 1.2417e-13,\n 6.5646e-15, 2.6046e-14, 5.8641e-15, 2.4129e-16, 1.3433e-16, 4.7855e-16,\n 4.3874e-15, 8.7421e-15, 7.3291e-16, 1.0754e-16, 4.5742e-14, 3.2649e-15,\n 2.4000e-15, 1.6736e-14, 1.4236e-14, 2.0947e-14, 8.6634e-15, 6.7221e-15,\n 2.2716e-15, 5.1459e-18, 1.7247e-14, 1.1184e-14, 5.0035e-15, 1.5983e-16,\n 1.0326e-16, 8.5440e-14, 3.9699e-15, 9.8507e-14, 3.1315e-14, 3.4182e-15,\n 4.7018e-14, 1.3289e-14, 2.7760e-14, 5.2761e-15], device='cuda:0')" }, "28": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.2931e-16, 2.0405e-12, 2.1057e-13, 4.3425e-13, 8.3055e-14, 6.8889e-15,\n 2.1245e-13, 1.0523e-12, 6.1506e-13, 7.6527e-15, 2.4301e-14, 1.4604e-14,\n 3.2910e-14, 3.1632e-14, 6.2628e-15, 3.0811e-14, 1.0928e-15, 1.1743e-13,\n 9.2971e-14, 7.5068e-13, 1.2967e-14, 3.3104e-13, 2.0522e-12, 3.8217e-13,\n 5.5597e-13, 3.9516e-15, 1.1819e-14, 2.2083e-12, 4.9943e-17, 3.6112e-15,\n 1.1601e-14, 4.2681e-13, 2.4462e-13, 1.7532e-13, 3.7984e-14, 1.0126e-13,\n 6.9205e-14, 2.4983e-14, 1.6789e-13, 3.4281e-14, 3.1470e-14, 1.5960e-13,\n 1.4656e-12, 1.3956e-13, 2.4903e-14, 1.3039e-13, 4.9646e-14, 1.0904e-13,\n 2.2787e-15, 2.4953e-13, 1.5082e-13, 3.2214e-12, 2.5220e-14, 5.9036e-14,\n 9.6614e-15, 8.5136e-13, 7.5302e-12, 1.6353e-12, 1.2367e-13, 1.7527e-13,\n 1.4234e-12, 2.4858e-14, 2.8189e-13, 3.0134e-12, 6.8597e-14, 1.1014e-12,\n 7.4657e-14, 7.1010e-14, 2.6861e-12, 5.4884e-13, 3.8630e-16, 3.4499e-14,\n 2.3481e-14, 3.3344e-14, 2.6056e-16, 1.0828e-14, 1.0484e-15, 4.1288e-15,\n 2.1766e-13, 1.8491e-14, 1.9684e-15, 5.6311e-13, 8.4865e-14, 4.1495e-13,\n 8.6598e-16, 6.6872e-14, 4.6583e-13, 7.6836e-15, 1.6670e-12, 9.9768e-15,\n 3.3400e-13, 1.3757e-13, 4.7058e-13, 6.9132e-14, 3.1393e-13, 3.4809e-12,\n 5.3102e-15, 2.3972e-15, 2.0791e-13, 8.5005e-14, 2.4780e-15, 3.5940e-14,\n 1.6147e-14, 1.9044e-13, 4.2053e-15, 5.9094e-14, 2.6458e-13, 1.0366e-16,\n 4.0272e-14, 2.0074e-15, 1.5870e-16, 2.8389e-14, 2.2686e-16, 1.6247e-14,\n 2.7400e-13, 4.7225e-13, 2.9205e-13, 2.2303e-12, 2.2147e-15, 1.3897e-14,\n 9.2194e-13, 1.8366e-15, 3.5575e-15, 2.9001e-16, 3.9203e-15, 3.3425e-15,\n 2.3781e-13, 3.6977e-14, 4.3456e-14, 1.7977e-13, 8.6883e-14, 1.4132e-14,\n 3.5375e-14, 1.0983e-14, 2.3639e-14, 6.7813e-17, 6.0306e-14, 5.0377e-14,\n 1.9196e-13, 3.5268e-12, 2.4492e-13, 1.4117e-15, 1.2234e-13, 6.0030e-12,\n 1.5482e-13, 2.6558e-14, 1.6402e-14, 2.1318e-14, 1.6996e-14, 4.2805e-14,\n 8.3387e-15, 5.8139e-15, 4.5691e-15, 4.4317e-13, 3.7500e-15, 1.8376e-15,\n 1.8839e-15, 8.5251e-13, 2.2469e-14, 1.4450e-13, 1.3117e-13, 1.1489e-16,\n 3.6141e-15, 1.3656e-12, 8.4558e-15, 8.8431e-13, 8.1294e-16, 1.7178e-13,\n 2.5494e-12, 4.0779e-14, 1.3093e-12, 2.8773e-14, 6.1543e-14, 2.2750e-16,\n 3.1303e-15, 3.3521e-14, 1.3284e-13, 1.7459e-16, 1.0948e-12, 2.3344e-14,\n 5.2716e-13, 3.7465e-13, 1.8423e-14, 1.2747e-14, 1.6509e-15, 6.7901e-14,\n 1.8926e-14, 7.7776e-15, 2.7713e-14, 1.2253e-13, 6.5397e-13, 3.8879e-17,\n 2.1636e-13, 3.5928e-13, 2.8315e-13, 8.8666e-12, 2.8392e-15, 1.5439e-13,\n 2.7570e-14, 2.2810e-13, 7.8401e-14, 2.5566e-13, 8.0573e-14, 4.2953e-14,\n 8.7667e-14, 7.8956e-14, 4.5773e-16, 1.6826e-13, 4.0025e-16, 1.9251e-12,\n 1.4644e-14, 3.6014e-15, 2.9648e-15, 1.9403e-12, 6.0338e-13, 1.4426e-13,\n 1.0672e-13, 1.6672e-12, 5.4848e-12, 1.4986e-15, 4.6063e-15, 4.9422e-13,\n 2.2037e-15, 3.2476e-13, 2.5169e-14, 6.8450e-15, 1.5436e-12, 1.2453e-12,\n 1.3791e-15, 5.2254e-15, 1.1539e-12, 6.4021e-14, 9.0518e-14, 4.8376e-14,\n 5.9978e-14, 1.0736e-15, 3.6560e-14, 1.2041e-15, 1.8361e-13, 3.0307e-14,\n 8.8918e-16, 1.5282e-13, 1.2741e-12, 6.2973e-14, 1.9620e-13, 5.0544e-14,\n 1.4701e-12, 1.4105e-14, 5.9116e-13, 1.0322e-13, 1.7840e-14, 7.8568e-17,\n 4.7163e-14, 1.4040e-12, 2.6538e-14, 2.9116e-12], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.1981e-17, 6.4484e-18, 1.8934e-17, 4.3906e-17, 7.8320e-18, 4.8395e-18,\n 2.7940e-17, 1.8514e-18, 9.4715e-18, 1.1525e-18, 5.7921e-17, 2.8255e-16,\n 3.9720e-17, 5.3527e-17, 8.5857e-17, 3.8314e-16, 3.1250e-17, 2.5244e-17,\n 1.1685e-16, 7.9900e-17, 1.4131e-20, 1.1465e-17, 3.2499e-17, 2.2034e-17,\n 3.5754e-17, 4.4834e-17, 3.0761e-17, 9.1566e-18, 1.0898e-18, 8.4568e-18,\n 6.9493e-17, 4.0358e-17, 6.2593e-18, 3.2734e-16, 1.2360e-16, 8.7425e-17,\n 1.8456e-18, 6.9729e-21, 2.9474e-17, 2.3981e-15, 1.3131e-16, 2.6319e-17,\n 8.6125e-18, 4.8164e-18, 2.2903e-16, 5.2575e-16, 5.7298e-17, 1.8728e-16,\n 5.5626e-18, 6.8266e-17, 6.3245e-19, 1.7698e-16, 3.4293e-17, 3.5566e-17,\n 3.6202e-18, 8.4567e-19, 9.7785e-17, 9.6248e-19, 8.2850e-17, 6.1641e-18,\n 2.4140e-17, 1.4325e-16, 2.3981e-16, 9.3880e-17, 3.5372e-19, 2.1643e-18,\n 5.4449e-17, 1.1559e-18, 2.0878e-17, 7.8616e-18, 1.7336e-18, 3.7273e-17,\n 1.2145e-16, 9.0520e-18, 2.5352e-17, 7.4189e-19, 2.4682e-18, 1.2236e-16,\n 3.9045e-18, 6.9530e-20, 9.3839e-17, 6.5394e-17, 7.5956e-20, 1.2532e-17,\n 7.1347e-17, 2.2378e-16, 1.7901e-17, 1.1332e-16, 1.2873e-17, 1.0051e-16,\n 1.7603e-16, 4.7854e-17, 1.1998e-16, 1.2976e-17, 3.3846e-17, 2.3434e-18,\n 1.6469e-17, 9.9203e-17, 3.1893e-17, 3.1564e-16, 6.5419e-16, 1.1155e-17,\n 2.4952e-19, 3.0593e-17, 3.9973e-19, 1.9568e-16, 2.1624e-17, 1.7642e-17,\n 5.7744e-17, 4.9897e-22, 2.5145e-17, 1.2620e-18, 1.9076e-16, 1.2036e-16,\n 6.7242e-18, 1.4056e-16, 1.5251e-17, 1.6702e-17, 3.2358e-17, 3.4858e-17,\n 6.0578e-18, 4.3122e-18, 3.5072e-18, 1.4871e-20, 9.8046e-19, 2.2357e-17,\n 2.5664e-16, 5.1689e-17, 2.1418e-16, 4.3380e-17, 2.2802e-20, 3.1712e-16,\n 9.0859e-17, 3.5170e-17, 1.3777e-16, 4.6803e-17, 4.9180e-17, 6.5507e-17,\n 5.6445e-17, 5.5470e-21, 3.8107e-18, 8.5773e-18, 1.2335e-17, 3.7687e-16,\n 6.4731e-17, 4.5544e-18, 1.0785e-16, 7.5561e-18, 4.2938e-17, 1.2163e-19,\n 3.2020e-18, 1.0689e-16, 1.8506e-16, 1.5219e-16, 8.0319e-18, 1.1537e-17,\n 1.1326e-16, 1.8876e-16, 2.7305e-17, 3.3655e-17, 2.6329e-17, 3.7113e-17,\n 5.9104e-17, 2.9670e-17, 4.6870e-17, 3.3188e-17, 1.8439e-16, 2.5860e-16,\n 2.3547e-17, 1.0169e-16, 7.9252e-18, 4.5153e-18, 1.4060e-16, 1.1993e-17,\n 1.3381e-16, 2.0483e-17, 2.8390e-18, 9.5993e-18, 8.6173e-18, 1.7265e-17,\n 7.8013e-18, 1.0871e-17, 2.0469e-18, 2.4038e-17, 6.1201e-16, 2.0690e-19,\n 1.5672e-17, 1.5039e-17, 6.5057e-18, 1.3095e-16, 2.5924e-17, 3.5774e-18,\n 3.1186e-17, 5.6283e-17, 3.8896e-19, 2.4078e-16, 1.9291e-17, 1.3342e-18,\n 1.2578e-16, 2.9786e-16, 3.9761e-20, 2.6784e-17, 2.1284e-16, 4.2738e-17,\n 4.0105e-17, 1.6837e-16, 4.0758e-17, 1.1715e-17, 1.6438e-20, 2.1710e-16,\n 3.2509e-18, 2.7609e-16, 7.2026e-19, 6.0351e-17, 3.3889e-19, 6.1167e-17,\n 5.3771e-17, 2.9186e-19, 1.0814e-17, 7.8949e-18, 9.6546e-18, 5.6239e-16,\n 2.3806e-17, 6.8167e-17, 1.6289e-17, 2.6128e-18, 3.7122e-21, 2.7448e-18,\n 6.0413e-18, 1.9952e-17, 2.8224e-18, 2.0492e-21, 1.7045e-16, 1.1027e-17,\n 8.7210e-18, 6.0468e-17, 5.1737e-17, 4.3308e-17, 3.9045e-17, 1.5066e-17,\n 1.1725e-17, 1.0505e-19, 5.7891e-17, 1.8723e-17, 9.1258e-18, 1.6065e-19,\n 1.8607e-19, 2.8405e-16, 9.0906e-18, 3.2163e-16, 5.3308e-17, 5.8932e-18,\n 1.9134e-16, 2.4096e-17, 5.0076e-17, 3.4107e-17], device='cuda:0')" }, "29": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0787e-17, 3.4276e-15, 8.2893e-17, 9.2334e-18, 1.3908e-17, 3.5783e-17,\n 4.8422e-18, 3.9145e-16, 1.0773e-16, 2.2215e-17, 8.4544e-17, 4.2323e-18,\n 2.0846e-17, 1.7827e-16, 2.2656e-17, 5.3455e-17, 5.0301e-19, 2.0031e-16,\n 3.9775e-18, 1.2066e-15, 2.3957e-17, 8.4969e-17, 9.4229e-15, 1.5699e-16,\n 1.6042e-15, 6.5836e-19, 2.8926e-18, 1.4479e-15, 1.7685e-18, 9.5964e-20,\n 3.1368e-18, 3.3625e-16, 5.4259e-17, 2.9931e-16, 4.2776e-18, 9.2822e-17,\n 6.2645e-18, 4.3409e-18, 1.7069e-16, 2.9910e-18, 6.6052e-17, 3.5751e-17,\n 1.9408e-15, 1.6413e-16, 5.1793e-17, 3.9717e-16, 2.5944e-18, 4.4633e-17,\n 3.6968e-18, 3.0016e-16, 7.9492e-17, 9.5524e-15, 1.4708e-17, 4.3049e-18,\n 2.2288e-17, 6.8027e-16, 1.6595e-14, 7.3661e-16, 4.6326e-17, 1.0108e-16,\n 1.9641e-16, 4.7138e-17, 7.6138e-17, 1.5008e-15, 3.4603e-18, 5.2940e-16,\n 6.7716e-17, 4.1873e-16, 1.0058e-14, 8.7460e-16, 4.5101e-18, 7.0350e-17,\n 9.0493e-18, 8.6474e-17, 3.3545e-17, 1.4830e-18, 1.3889e-17, 4.8920e-19,\n 5.7734e-17, 1.5598e-17, 5.0881e-20, 3.5402e-16, 4.1934e-18, 3.6629e-15,\n 1.1854e-18, 8.0583e-16, 6.5344e-17, 8.4907e-18, 3.4225e-15, 2.9605e-17,\n 8.7038e-16, 1.0507e-17, 1.3142e-16, 4.5063e-18, 6.4851e-17, 3.6926e-15,\n 1.6783e-16, 6.7518e-19, 1.4405e-16, 5.6231e-17, 1.8912e-17, 2.5119e-18,\n 7.2154e-18, 2.7460e-17, 5.6308e-18, 2.0401e-16, 5.6240e-16, 1.4199e-19,\n 8.8668e-18, 8.4564e-18, 4.3822e-18, 7.4117e-17, 4.4497e-17, 9.8335e-19,\n 1.2680e-15, 5.6621e-16, 2.0017e-16, 4.9837e-15, 1.1031e-19, 4.0350e-18,\n 5.2562e-16, 4.6491e-18, 5.7179e-19, 1.1734e-18, 1.8932e-18, 4.2116e-19,\n 4.4503e-18, 1.9819e-18, 2.5090e-18, 1.4495e-17, 1.5699e-17, 2.9388e-19,\n 1.1430e-18, 6.1362e-18, 8.6112e-18, 2.0162e-18, 3.2155e-17, 5.1464e-17,\n 3.9484e-16, 1.1176e-14, 3.6511e-17, 8.8419e-18, 2.3350e-17, 2.6393e-15,\n 2.8577e-17, 3.2031e-17, 1.6876e-18, 4.8815e-19, 2.0049e-18, 1.4570e-17,\n 2.7429e-18, 8.5853e-19, 6.4243e-18, 1.4727e-15, 8.8251e-19, 4.1948e-18,\n 1.7220e-19, 5.6173e-16, 8.3776e-20, 1.0069e-17, 5.4046e-17, 1.1857e-18,\n 3.2927e-17, 4.8691e-15, 4.8883e-19, 8.1479e-17, 8.0398e-18, 9.1787e-17,\n 3.4162e-15, 6.5067e-17, 2.7724e-15, 5.7912e-19, 7.4816e-17, 1.3300e-18,\n 2.3198e-17, 5.6027e-17, 1.6750e-17, 4.9967e-18, 2.4146e-16, 7.5002e-19,\n 5.5345e-17, 3.9196e-16, 1.0202e-16, 7.8617e-17, 9.3379e-18, 4.1571e-17,\n 4.7888e-17, 3.8280e-18, 2.8752e-17, 7.0185e-17, 3.1501e-16, 1.0031e-17,\n 4.0602e-17, 2.8345e-16, 2.6716e-17, 2.5710e-14, 5.3061e-20, 1.1050e-17,\n 1.1397e-18, 3.3643e-17, 4.7332e-18, 3.4064e-16, 6.3199e-18, 1.0741e-16,\n 2.3375e-17, 1.3692e-18, 3.0769e-18, 3.0508e-17, 3.1796e-18, 3.9432e-15,\n 9.7325e-18, 1.0825e-18, 2.2325e-18, 6.5372e-15, 2.4424e-17, 2.3105e-17,\n 1.1648e-17, 3.8382e-15, 1.0482e-14, 1.5144e-17, 2.1330e-18, 1.4348e-15,\n 2.8595e-19, 1.2711e-16, 1.2713e-17, 4.6559e-19, 2.9157e-16, 9.1619e-16,\n 4.7760e-20, 4.4354e-18, 2.0614e-16, 4.9179e-17, 4.4801e-16, 1.2655e-15,\n 4.6405e-18, 6.3162e-18, 4.9976e-17, 4.0665e-18, 3.3186e-16, 2.6987e-19,\n 1.3521e-17, 1.9376e-16, 1.4774e-15, 4.5247e-17, 1.7293e-16, 8.1456e-18,\n 1.2930e-15, 4.6792e-19, 4.3905e-16, 2.4648e-16, 2.4895e-18, 4.9321e-20,\n 5.2966e-18, 9.3272e-17, 2.0392e-18, 5.6909e-15], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.1594e-16, 6.3709e-18, 3.4629e-17, 5.8975e-17, 9.4847e-18, 6.5879e-18,\n 4.1838e-17, 2.9973e-18, 1.7273e-17, 1.8882e-18, 1.0192e-16, 3.9006e-16,\n 6.0760e-17, 8.7087e-17, 1.0156e-16, 3.9695e-16, 5.3350e-17, 3.4649e-17,\n 1.1395e-16, 1.7387e-16, 1.9591e-21, 2.0422e-17, 4.2309e-17, 5.0029e-17,\n 4.8824e-17, 6.1709e-17, 6.5981e-17, 1.5548e-17, 2.6873e-18, 1.5069e-17,\n 1.1607e-16, 7.2955e-17, 1.4006e-17, 4.7353e-16, 2.0436e-16, 1.3106e-16,\n 1.4808e-18, 3.2038e-20, 5.4041e-17, 1.4274e-15, 2.0420e-16, 4.1488e-17,\n 1.4002e-17, 9.5069e-18, 2.3536e-16, 4.1782e-16, 8.5282e-17, 1.8586e-16,\n 1.3893e-17, 1.1486e-16, 1.1042e-18, 2.9790e-16, 5.8786e-17, 5.5226e-17,\n 6.5900e-18, 2.1754e-18, 1.3284e-16, 2.9469e-18, 9.5531e-17, 1.0006e-17,\n 3.0503e-17, 9.7942e-17, 2.1794e-16, 1.1934e-16, 1.2467e-18, 5.3733e-18,\n 9.0853e-17, 2.8363e-18, 5.6179e-17, 1.1964e-17, 1.1195e-18, 5.9204e-17,\n 1.7447e-16, 2.0788e-17, 3.2480e-17, 6.2335e-19, 3.9890e-18, 1.8620e-16,\n 5.7671e-18, 8.0896e-20, 1.1180e-16, 1.3602e-16, 2.9164e-19, 1.4598e-17,\n 8.5917e-17, 3.3281e-16, 2.8065e-17, 1.1573e-16, 1.8091e-17, 1.9341e-16,\n 1.4336e-16, 8.5172e-17, 1.7015e-16, 2.8714e-17, 6.8639e-17, 7.0148e-18,\n 2.6703e-17, 2.0585e-16, 6.4629e-17, 4.5444e-16, 4.3141e-16, 1.4676e-17,\n 4.2857e-19, 4.1333e-17, 5.9730e-19, 3.2327e-16, 1.8190e-17, 3.1188e-17,\n 6.1540e-17, 1.2090e-20, 3.4073e-17, 3.4360e-19, 2.6019e-16, 2.1781e-16,\n 1.5363e-17, 1.8626e-16, 2.2174e-17, 2.7545e-17, 5.8608e-17, 5.5944e-17,\n 1.4683e-17, 3.4881e-18, 4.0410e-18, 3.8780e-20, 2.3134e-18, 3.7488e-17,\n 4.2756e-16, 7.6895e-17, 1.8526e-16, 6.2011e-17, 1.3867e-21, 4.8889e-16,\n 9.2814e-17, 8.1446e-17, 1.5054e-16, 8.8420e-17, 1.2513e-16, 1.4201e-16,\n 1.1491e-16, 1.1934e-20, 5.6793e-18, 1.0030e-17, 3.5685e-17, 4.6214e-16,\n 1.0614e-16, 8.6134e-18, 1.0864e-16, 1.5133e-17, 8.1832e-17, 9.9201e-19,\n 7.0108e-18, 1.3900e-16, 1.3633e-16, 1.2467e-16, 2.4343e-17, 9.7737e-18,\n 7.3204e-17, 2.5855e-16, 3.8400e-17, 7.2188e-17, 6.8602e-17, 7.5311e-17,\n 3.2638e-17, 6.8834e-17, 6.7822e-17, 4.6234e-17, 3.7402e-16, 2.7144e-16,\n 5.5349e-17, 1.9282e-16, 1.0714e-17, 1.0565e-17, 2.2891e-16, 2.8199e-17,\n 1.5338e-16, 2.0948e-17, 3.7753e-18, 2.0819e-17, 1.1354e-17, 2.4796e-17,\n 1.7205e-17, 1.4515e-17, 4.8483e-18, 4.0225e-17, 7.8064e-16, 3.8786e-19,\n 3.2985e-17, 2.5956e-17, 1.6881e-17, 2.4636e-16, 6.4311e-17, 6.6687e-18,\n 4.1836e-17, 6.1498e-17, 1.4265e-18, 4.6944e-16, 1.5430e-17, 2.2867e-18,\n 1.2620e-16, 4.4939e-16, 3.6494e-19, 5.3873e-17, 2.0132e-16, 3.5722e-17,\n 5.3165e-17, 3.3013e-16, 7.0412e-17, 1.3771e-17, 1.4008e-19, 3.6991e-16,\n 4.5105e-18, 4.0964e-16, 6.7923e-19, 9.9218e-17, 2.6851e-19, 1.0043e-16,\n 1.3186e-16, 3.4303e-19, 1.7906e-17, 1.4321e-17, 8.0436e-18, 5.1525e-16,\n 3.2622e-17, 1.2054e-16, 3.0650e-17, 3.4348e-18, 5.7834e-21, 4.4588e-18,\n 1.1538e-17, 3.8917e-17, 5.0390e-18, 6.1011e-20, 2.0174e-16, 1.6265e-17,\n 1.1685e-17, 7.7484e-17, 4.1447e-17, 9.4864e-17, 4.3374e-17, 1.9788e-17,\n 9.2316e-18, 2.4634e-19, 7.9269e-17, 3.8906e-17, 1.6712e-17, 1.9543e-19,\n 8.1177e-20, 2.9601e-16, 9.4383e-18, 4.0675e-16, 1.0554e-16, 9.2774e-18,\n 1.5460e-16, 4.2768e-17, 1.0572e-16, 2.9305e-17], device='cuda:0')" }, "30": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.3846e-18, 2.8094e-15, 3.2838e-16, 4.9867e-16, 1.3020e-16, 1.6672e-17,\n 2.9074e-16, 1.4463e-15, 8.5073e-16, 3.5179e-17, 2.3994e-17, 3.8845e-17,\n 1.1397e-16, 2.1295e-16, 4.9779e-17, 1.8437e-17, 1.8781e-18, 2.1514e-16,\n 1.0923e-16, 1.1096e-15, 8.7530e-18, 4.8110e-16, 2.0432e-15, 6.6615e-16,\n 9.2343e-16, 7.6758e-18, 2.8053e-17, 2.4960e-15, 5.1755e-20, 6.0641e-18,\n 1.0977e-17, 6.4385e-16, 3.5785e-16, 5.5983e-16, 8.9209e-18, 2.5206e-16,\n 1.0844e-16, 2.3962e-17, 2.9529e-16, 1.9524e-17, 1.1466e-16, 2.4473e-16,\n 2.0975e-15, 2.5837e-16, 1.3429e-16, 2.4568e-16, 3.9403e-17, 2.7124e-17,\n 1.3170e-18, 3.0317e-16, 2.2642e-16, 4.6309e-15, 5.2170e-17, 4.1307e-17,\n 2.8201e-17, 9.1487e-16, 1.0093e-14, 2.2274e-15, 1.2482e-16, 2.6024e-16,\n 1.8791e-15, 7.6742e-17, 2.7850e-16, 3.6573e-15, 1.0197e-16, 1.5283e-15,\n 1.1744e-16, 2.2495e-16, 3.8156e-15, 8.2725e-16, 4.1659e-19, 2.5870e-17,\n 5.1021e-17, 1.8761e-16, 4.2636e-17, 3.9183e-18, 4.5670e-18, 1.8876e-19,\n 3.2832e-16, 4.5649e-17, 1.2899e-18, 7.0871e-16, 1.1356e-16, 9.5399e-16,\n 7.0322e-18, 2.6088e-16, 5.3097e-16, 4.3089e-18, 2.4704e-15, 7.7990e-18,\n 6.0764e-16, 1.9726e-16, 4.9130e-16, 9.5034e-17, 4.4494e-16, 4.2320e-15,\n 1.1210e-16, 1.9724e-19, 3.6165e-16, 1.8349e-16, 1.0907e-17, 5.4050e-17,\n 4.7700e-17, 2.3883e-16, 2.0780e-18, 2.8974e-16, 4.4536e-16, 2.5481e-18,\n 1.1439e-16, 1.8086e-18, 1.9742e-18, 8.3107e-17, 4.3147e-17, 2.3182e-17,\n 3.0196e-16, 7.1933e-16, 4.5704e-16, 2.7119e-15, 4.2938e-18, 1.7621e-17,\n 9.8508e-16, 7.6363e-19, 5.8139e-18, 6.2498e-19, 2.4389e-17, 6.9284e-18,\n 2.5377e-16, 2.0053e-17, 2.1981e-17, 1.5739e-16, 1.3125e-16, 3.3563e-18,\n 2.9498e-17, 1.6788e-18, 2.3538e-17, 1.1672e-18, 1.3038e-16, 7.9860e-17,\n 3.2468e-16, 4.9283e-15, 3.4270e-16, 8.5299e-19, 2.4049e-16, 7.5416e-15,\n 1.3592e-16, 1.3380e-17, 3.3121e-17, 3.2882e-17, 3.0503e-17, 7.2119e-17,\n 6.3419e-19, 1.4388e-17, 2.1060e-18, 7.1307e-16, 4.4778e-18, 6.3666e-19,\n 2.0808e-20, 1.1900e-15, 1.5918e-17, 1.6966e-16, 6.1122e-17, 9.7123e-20,\n 1.3619e-17, 1.6561e-15, 1.5360e-17, 1.1739e-15, 5.7872e-18, 2.7750e-16,\n 2.9321e-15, 4.6315e-17, 1.3481e-15, 1.8121e-17, 1.9784e-16, 4.0244e-19,\n 1.1735e-17, 1.6398e-17, 1.0860e-16, 1.4199e-18, 1.2192e-15, 3.4108e-17,\n 6.2278e-16, 3.3472e-16, 2.5800e-17, 3.7648e-17, 3.0986e-18, 1.2504e-16,\n 1.0090e-16, 2.3276e-18, 1.3167e-16, 2.0254e-16, 9.1090e-16, 1.4848e-20,\n 3.1687e-16, 5.1971e-16, 2.7491e-16, 1.1838e-14, 2.3375e-19, 2.2197e-16,\n 4.1848e-17, 1.9919e-16, 6.2471e-17, 4.2102e-16, 6.5650e-17, 1.3115e-16,\n 8.9960e-17, 1.1228e-16, 2.2312e-18, 2.4441e-16, 3.9189e-18, 2.4879e-15,\n 3.5576e-18, 6.2130e-19, 6.5053e-19, 2.4911e-15, 8.0473e-16, 1.3431e-16,\n 1.5378e-16, 2.3687e-15, 7.3690e-15, 1.0455e-17, 8.7289e-18, 7.0679e-16,\n 3.9024e-18, 3.8065e-16, 9.5626e-17, 3.0667e-18, 2.0447e-15, 1.6201e-15,\n 1.4861e-18, 1.2593e-17, 1.4457e-15, 1.0875e-16, 1.4397e-16, 2.9357e-16,\n 2.6132e-17, 8.7299e-18, 1.6471e-16, 4.4829e-19, 4.8156e-16, 1.3674e-17,\n 1.1968e-18, 2.9490e-16, 1.4233e-15, 1.3658e-16, 2.8618e-16, 8.3410e-17,\n 2.0372e-15, 2.4162e-17, 8.7511e-16, 1.6143e-16, 2.6480e-17, 7.1423e-20,\n 5.5956e-17, 1.8514e-15, 3.8546e-17, 4.0383e-15], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0676e-17, 3.6054e-17, 0.0000e+00, ..., 1.2403e-16, 7.0575e-17,\n 7.5448e-17],\n [1.7054e-17, 6.3699e-18, 0.0000e+00, ..., 5.3276e-18, 6.1709e-17,\n 2.7827e-18],\n [1.3890e-17, 2.4511e-17, 0.0000e+00, ..., 1.7050e-17, 1.1566e-16,\n 6.8384e-18],\n ...,\n [3.3581e-18, 9.1424e-18, 0.0000e+00, ..., 1.1628e-17, 1.7283e-17,\n 1.0669e-19],\n [7.6675e-17, 6.8161e-17, 0.0000e+00, ..., 9.9116e-17, 4.2279e-16,\n 8.6190e-17],\n [6.1900e-19, 8.2765e-19, 0.0000e+00, ..., 3.1924e-18, 7.4659e-18,\n 7.7689e-20]], device='cuda:0')" }, "31": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2296e-15, 1.2808e-15, 6.4745e-19, ..., 8.9767e-17, 1.4683e-16,\n 4.2670e-17],\n [1.6580e-17, 2.1420e-17, 3.8502e-20, ..., 1.5573e-18, 1.9412e-18,\n 4.1946e-19],\n [1.0869e-15, 1.1964e-15, 4.7284e-19, ..., 9.5644e-17, 1.1532e-16,\n 4.1609e-17],\n ...,\n [7.8089e-15, 8.7674e-15, 1.7484e-19, ..., 7.0895e-16, 8.7702e-16,\n 3.9827e-16],\n [5.7711e-16, 6.6082e-16, 1.4115e-18, ..., 4.6321e-17, 7.9577e-17,\n 2.3436e-17],\n [6.6505e-18, 7.0099e-18, 1.2129e-21, ..., 4.6185e-19, 5.5965e-19,\n 1.2744e-19]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.6154e-14, 2.0564e-14, 2.6147e-14, 8.1373e-16, 7.3936e-15, 7.8207e-16,\n 8.0214e-15, 6.8599e-16, 4.6872e-15, 3.0042e-15, 6.1715e-14, 6.0831e-14,\n 8.3488e-15, 6.0271e-15, 1.1793e-14, 1.5954e-15, 2.9951e-14, 8.9095e-15,\n 2.9918e-14, 8.3297e-14, 1.3701e-16, 2.3878e-14, 5.2615e-15, 4.8881e-14,\n 3.6054e-15, 7.9175e-15, 5.2351e-14, 8.4631e-16, 2.9724e-15, 9.0759e-15,\n 1.5744e-14, 5.1826e-14, 6.8491e-17, 9.3274e-14, 5.3666e-14, 6.3709e-14,\n 2.4165e-15, 8.6129e-17, 3.4620e-14, 2.2859e-13, 9.6071e-14, 2.7054e-15,\n 2.4588e-15, 2.4784e-15, 1.7470e-14, 3.9879e-14, 2.4305e-14, 1.1233e-14,\n 2.4467e-15, 3.3792e-14, 9.0572e-16, 8.4621e-14, 4.2412e-14, 2.8258e-14,\n 1.2509e-15, 2.5979e-15, 1.9018e-14, 2.2250e-15, 1.4100e-14, 4.4581e-15,\n 5.4476e-15, 1.4099e-14, 2.3208e-14, 5.7086e-14, 5.9805e-16, 1.4229e-14,\n 1.3704e-14, 3.3132e-15, 2.7300e-14, 3.9634e-15, 2.7796e-17, 1.1045e-14,\n 4.5328e-14, 4.4176e-15, 2.7051e-15, 3.5016e-17, 7.1107e-15, 7.9555e-14,\n 3.1128e-15, 2.2339e-17, 3.2416e-14, 1.0352e-13, 4.6268e-17, 7.6142e-16,\n 1.2857e-14, 7.6638e-14, 3.4602e-15, 1.5719e-14, 1.1499e-14, 2.9435e-15,\n 1.6284e-14, 5.8714e-14, 1.7885e-14, 2.0751e-14, 6.6545e-14, 1.6586e-14,\n 2.9410e-14, 9.9784e-14, 1.5554e-14, 1.2070e-13, 1.1244e-14, 2.3027e-15,\n 4.4862e-18, 1.1906e-14, 1.3069e-16, 1.4388e-13, 4.1184e-15, 1.6276e-14,\n 1.2376e-14, 8.6030e-17, 1.0909e-13, 4.8258e-15, 3.3888e-14, 5.1034e-14,\n 8.9168e-15, 7.1054e-14, 1.5316e-15, 5.6420e-15, 8.8272e-15, 4.7374e-14,\n 4.9405e-15, 3.7828e-15, 2.1061e-15, 3.5367e-17, 3.5010e-16, 2.5454e-14,\n 4.2596e-14, 6.2755e-15, 2.1109e-14, 4.3260e-14, 4.7233e-16, 9.6995e-14,\n 2.3687e-14, 3.5462e-15, 4.1060e-14, 1.5534e-14, 7.7728e-14, 1.5062e-14,\n 1.1869e-14, 8.5395e-18, 3.0269e-17, 2.9258e-16, 5.3232e-14, 1.9736e-13,\n 3.4558e-14, 6.8037e-15, 8.3490e-15, 1.3209e-14, 1.1664e-14, 9.9676e-16,\n 3.0516e-15, 2.7034e-14, 1.9605e-14, 4.7982e-15, 5.8723e-15, 3.4735e-15,\n 9.3409e-15, 1.0513e-13, 4.6520e-15, 5.0394e-15, 1.6654e-14, 5.2136e-15,\n 4.9644e-15, 1.4873e-14, 4.7205e-15, 2.2151e-14, 1.0157e-13, 7.3484e-15,\n 3.4690e-14, 6.2899e-15, 3.1883e-16, 1.1519e-14, 2.1290e-14, 3.3303e-15,\n 4.3091e-14, 7.3254e-15, 5.0866e-15, 4.8584e-15, 5.4157e-15, 1.4642e-14,\n 1.7681e-14, 5.2752e-15, 7.8925e-15, 1.3107e-14, 1.1581e-13, 6.7681e-17,\n 9.0804e-15, 3.4134e-14, 6.0614e-15, 1.1123e-13, 1.9323e-14, 1.1377e-14,\n 6.5414e-15, 2.2810e-15, 1.0007e-17, 3.0845e-13, 4.3098e-15, 9.2001e-16,\n 9.0408e-15, 4.2296e-14, 4.8545e-17, 1.2548e-13, 8.6491e-15, 1.0793e-14,\n 1.0604e-14, 2.9138e-14, 5.2898e-14, 1.0772e-15, 2.4636e-17, 2.0193e-14,\n 5.8333e-15, 1.4829e-14, 5.5264e-16, 3.6905e-14, 7.3085e-16, 8.0365e-14,\n 9.3248e-14, 3.6704e-16, 9.1831e-16, 9.3420e-15, 3.8052e-15, 5.2066e-14,\n 4.8419e-15, 6.3040e-14, 8.5026e-15, 1.1672e-18, 1.0812e-17, 8.3362e-16,\n 1.2938e-15, 1.3625e-14, 3.5862e-16, 1.2097e-15, 1.4076e-14, 8.1658e-15,\n 5.5725e-14, 4.2891e-16, 3.9163e-15, 2.6089e-14, 7.2887e-15, 8.5381e-15,\n 1.5483e-14, 6.3138e-17, 6.1306e-15, 2.1873e-14, 8.9879e-15, 2.1686e-16,\n 5.0540e-16, 4.1776e-14, 2.8954e-15, 9.3492e-15, 7.5187e-14, 1.1517e-15,\n 4.6892e-14, 4.1462e-15, 1.1164e-13, 2.6702e-15], device='cuda:0')" }, "32": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.4221e-13, 7.7337e-15, 6.1741e-13, 2.4874e-14, 5.2905e-16, 8.4595e-15,\n 1.2429e-13, 1.2047e-12, 4.4880e-13, 2.5413e-14, 4.4566e-12, 3.4839e-14,\n 2.2756e-16, 2.1623e-16, 3.4475e-15, 8.3146e-14, 5.1822e-15, 4.0334e-13,\n 6.6921e-13, 2.5209e-13, 4.5352e-13, 1.2257e-14, 4.0581e-14, 1.7765e-12,\n 1.1764e-14, 1.5690e-13, 5.5795e-13, 1.0063e-14, 7.5946e-16, 2.1434e-13,\n 8.6201e-15, 7.5204e-13, 1.9481e-13, 1.6155e-13, 4.7770e-14, 5.0507e-13,\n 1.1978e-14, 6.4001e-14, 4.5925e-13, 3.4204e-14, 1.5930e-14, 1.3127e-13,\n 4.4159e-13, 1.1119e-14, 8.0377e-16, 4.1409e-15, 2.0828e-13, 3.3606e-15,\n 4.2797e-13, 3.8949e-13, 6.5921e-14, 3.9427e-13, 5.2108e-15, 1.9547e-13,\n 7.3362e-14, 5.6423e-13, 4.4511e-13, 1.6641e-12, 8.8724e-14, 3.1274e-13,\n 5.9542e-12, 4.2729e-15, 5.9328e-15, 5.6439e-13, 7.0960e-15, 1.1786e-12,\n 1.4174e-14, 1.1734e-14, 9.6787e-16, 2.2615e-13, 3.2768e-13, 4.7574e-13,\n 3.9286e-13, 1.0667e-13, 2.6766e-15, 2.7632e-15, 5.1195e-14, 3.6910e-14,\n 4.5227e-13, 1.5065e-14, 2.5423e-16, 1.9467e-12, 1.6451e-12, 4.8392e-16,\n 1.4539e-13, 4.2505e-14, 6.7664e-13, 2.0540e-13, 1.9668e-12, 8.3058e-15,\n 5.1587e-13, 2.3732e-13, 2.9755e-14, 9.6717e-15, 8.4468e-14, 1.4554e-12,\n 2.3675e-13, 3.0444e-15, 1.0937e-12, 8.2439e-14, 5.7917e-13, 1.5453e-15,\n 2.7375e-14, 1.3465e-13, 2.3417e-14, 1.1812e-13, 5.9510e-14, 7.3777e-16,\n 7.9890e-14, 4.9017e-15, 3.1662e-13, 3.0551e-18, 3.0688e-15, 1.4596e-15,\n 9.6637e-13, 1.5414e-13, 3.6410e-13, 8.1916e-13, 1.5408e-14, 1.9094e-12,\n 8.5235e-14, 7.7461e-14, 2.1076e-13, 9.7326e-16, 1.7841e-14, 7.1810e-15,\n 7.2015e-15, 1.1077e-13, 2.1789e-14, 8.6079e-15, 5.2915e-15, 1.5034e-13,\n 6.0234e-15, 1.0081e-16, 2.6516e-15, 1.5688e-14, 5.2218e-15, 2.1510e-14,\n 1.1153e-13, 1.8616e-12, 3.6012e-13, 2.1591e-14, 2.3548e-12, 1.2864e-11,\n 9.6046e-13, 3.2926e-14, 1.1763e-14, 4.7288e-13, 2.6823e-16, 4.4839e-14,\n 5.1900e-14, 2.1202e-15, 1.6256e-14, 4.4279e-13, 4.0739e-13, 1.5773e-13,\n 5.2365e-14, 1.0553e-12, 1.2832e-14, 7.2878e-14, 6.5013e-15, 1.0540e-15,\n 1.9983e-13, 4.4671e-13, 4.8005e-15, 2.9704e-13, 3.2646e-13, 1.0681e-14,\n 1.2109e-12, 3.7161e-15, 1.7241e-12, 4.5515e-13, 3.9857e-13, 8.8506e-16,\n 1.6398e-14, 4.5639e-14, 1.8121e-13, 7.2122e-13, 1.0884e-12, 2.2295e-15,\n 2.9116e-12, 2.3288e-15, 2.1898e-12, 2.3225e-14, 1.4054e-14, 1.4203e-13,\n 6.9231e-15, 1.2535e-14, 1.7073e-13, 3.7662e-14, 6.5743e-14, 2.1637e-14,\n 9.6043e-13, 4.5565e-13, 5.9462e-14, 8.6197e-12, 2.9284e-14, 2.5177e-15,\n 1.6570e-14, 2.2068e-14, 1.3692e-12, 6.9864e-15, 2.7391e-13, 3.4741e-15,\n 1.3459e-14, 2.6235e-13, 8.0671e-15, 9.2974e-13, 8.1771e-15, 3.3491e-12,\n 2.2958e-15, 3.1953e-13, 7.0044e-15, 8.5794e-13, 4.9416e-14, 1.5238e-14,\n 6.6371e-16, 3.5586e-14, 5.7262e-12, 4.7291e-13, 7.6013e-14, 5.0635e-15,\n 1.7732e-15, 8.7317e-14, 4.2224e-14, 1.1582e-13, 1.4911e-12, 6.3209e-14,\n 1.9771e-14, 1.5980e-14, 1.1118e-14, 5.1582e-14, 4.3680e-15, 2.5653e-13,\n 6.9015e-14, 3.5288e-13, 3.4422e-14, 1.9577e-15, 1.9416e-13, 5.3469e-13,\n 7.9815e-13, 5.6838e-15, 4.7495e-13, 3.3295e-13, 3.1126e-15, 4.7497e-15,\n 5.3169e-13, 1.5209e-13, 7.4595e-14, 8.1333e-15, 4.1117e-13, 1.4948e-15,\n 3.5300e-13, 5.0369e-12, 3.7174e-13, 3.3428e-15], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4259e-16, 6.5911e-17, 7.3037e-17, 4.3227e-18, 1.6377e-17, 2.3216e-18,\n 1.1700e-17, 5.5828e-18, 6.3955e-18, 7.2108e-18, 1.1362e-16, 1.0002e-16,\n 1.7280e-17, 1.5273e-17, 3.8940e-17, 1.1585e-17, 7.1475e-17, 1.5505e-17,\n 7.3203e-17, 1.9807e-16, 6.5923e-20, 8.3825e-17, 2.5077e-17, 1.0296e-16,\n 1.2102e-17, 9.1624e-18, 2.4999e-16, 2.6057e-18, 1.0338e-17, 1.7420e-17,\n 2.3821e-17, 3.7749e-16, 6.4407e-19, 1.5948e-16, 1.5960e-16, 2.4263e-16,\n 3.0261e-18, 9.7576e-20, 1.0100e-16, 5.7861e-16, 2.8413e-16, 4.4994e-18,\n 6.5514e-18, 1.1282e-17, 2.2649e-17, 7.1526e-17, 1.0026e-16, 2.7049e-17,\n 3.1228e-18, 7.8475e-17, 9.8918e-19, 2.5925e-16, 8.8145e-17, 6.1553e-17,\n 1.4637e-18, 1.8525e-17, 4.0974e-17, 2.4622e-18, 7.1383e-17, 1.5372e-17,\n 1.2528e-17, 5.7826e-17, 7.3681e-17, 2.5635e-16, 7.2101e-19, 4.1407e-17,\n 3.7340e-17, 5.3127e-18, 6.3409e-17, 8.5927e-18, 8.0513e-21, 4.0280e-17,\n 1.1587e-16, 1.4607e-17, 4.0896e-18, 1.3241e-19, 5.0548e-17, 2.3125e-16,\n 6.4817e-18, 1.1965e-21, 1.8923e-16, 5.6976e-16, 2.8097e-19, 4.2766e-18,\n 5.7852e-17, 2.2380e-16, 1.3249e-17, 6.1336e-17, 2.4493e-17, 1.5458e-17,\n 4.4314e-17, 1.4069e-16, 2.5899e-17, 5.4800e-17, 2.2748e-16, 3.3094e-17,\n 7.8661e-17, 2.2839e-16, 3.9647e-17, 3.4962e-16, 2.4861e-17, 1.3954e-17,\n 6.4011e-19, 2.2807e-17, 6.6339e-20, 4.4298e-16, 8.1828e-18, 8.2479e-17,\n 1.9890e-17, 3.1645e-23, 3.3412e-16, 1.4209e-17, 8.5395e-17, 8.8819e-17,\n 1.5877e-17, 2.2321e-16, 4.7933e-18, 9.2316e-18, 1.7585e-17, 2.0476e-16,\n 6.0206e-18, 5.4200e-18, 2.2346e-18, 1.6547e-18, 3.2496e-18, 8.4267e-17,\n 7.6272e-17, 1.5789e-17, 6.4965e-17, 1.6310e-16, 2.4614e-20, 1.8204e-16,\n 9.1401e-17, 1.0855e-17, 6.8055e-17, 2.2355e-17, 2.6527e-16, 3.3980e-17,\n 3.0063e-17, 1.3150e-21, 1.2437e-20, 1.4400e-18, 1.2792e-16, 8.1679e-16,\n 1.1977e-16, 2.5867e-17, 2.0490e-17, 7.8877e-17, 1.3806e-17, 9.9341e-19,\n 2.0007e-17, 5.5198e-17, 7.7579e-17, 7.8272e-18, 7.7430e-18, 2.7757e-17,\n 1.6530e-17, 2.6710e-16, 7.0282e-18, 2.1938e-17, 3.6907e-17, 7.7515e-18,\n 6.0079e-18, 5.0688e-17, 1.1322e-17, 4.8096e-17, 1.9036e-16, 1.4081e-17,\n 1.6703e-16, 2.0717e-17, 1.0045e-19, 2.1620e-17, 3.4590e-17, 1.0165e-17,\n 1.2231e-16, 5.0783e-17, 7.4929e-18, 1.5681e-17, 1.1314e-17, 3.1306e-17,\n 4.0050e-17, 8.1032e-18, 1.6837e-17, 2.1478e-17, 2.6888e-16, 4.3670e-21,\n 1.5305e-17, 1.2451e-16, 6.9339e-18, 2.4267e-16, 3.2524e-17, 2.8009e-17,\n 2.5310e-17, 7.1692e-18, 2.5817e-20, 1.2637e-15, 4.6298e-18, 6.8174e-19,\n 1.8866e-17, 8.1064e-17, 1.6727e-18, 5.5100e-16, 1.5175e-17, 2.9791e-17,\n 2.1147e-17, 6.5037e-17, 1.3646e-16, 1.8930e-18, 1.7774e-21, 3.5141e-17,\n 1.2031e-17, 3.3597e-17, 1.7441e-18, 1.2925e-16, 5.2775e-19, 2.3294e-16,\n 2.1972e-16, 4.4473e-20, 2.5561e-18, 2.0024e-17, 7.4144e-18, 1.2657e-16,\n 1.6367e-17, 1.3406e-16, 2.3299e-17, 1.0584e-19, 7.8485e-20, 3.0498e-18,\n 1.8382e-18, 4.1135e-17, 1.0738e-18, 6.9520e-19, 4.8712e-17, 2.8567e-17,\n 1.7565e-16, 2.9091e-18, 5.7866e-18, 5.5722e-17, 3.1143e-17, 1.5638e-17,\n 3.0413e-17, 3.4371e-20, 1.7405e-17, 6.2354e-17, 1.4697e-17, 1.7127e-20,\n 2.1721e-19, 6.8351e-17, 4.5753e-18, 3.1942e-17, 2.2111e-16, 1.0766e-18,\n 1.6814e-16, 7.8136e-18, 2.9772e-16, 8.4163e-18], device='cuda:0')" }, "33": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.3278e-17, 2.7436e-17, 2.2045e-16, 1.6220e-17, 5.7939e-17, 3.7107e-17,\n 5.8951e-18, 1.1106e-15, 5.1795e-17, 3.4918e-17, 6.7086e-15, 1.5157e-17,\n 1.1606e-17, 6.6293e-19, 7.1280e-17, 2.3591e-18, 4.4711e-18, 1.1429e-15,\n 3.6802e-16, 1.5687e-16, 2.8802e-16, 4.4273e-17, 2.0672e-18, 3.4288e-15,\n 3.2028e-18, 4.4386e-17, 3.2280e-16, 5.9490e-18, 6.7064e-18, 2.9815e-16,\n 1.4904e-18, 1.4463e-15, 2.5103e-17, 1.8634e-16, 6.9148e-18, 2.8783e-15,\n 5.9503e-18, 1.1281e-17, 2.6049e-16, 1.0174e-18, 6.9827e-18, 2.3828e-17,\n 1.6300e-16, 2.1701e-16, 9.3421e-19, 3.2619e-19, 7.0603e-17, 5.6141e-20,\n 1.4274e-15, 1.2483e-16, 4.0460e-17, 1.7992e-17, 2.2495e-17, 5.0133e-17,\n 7.5038e-17, 1.1743e-16, 5.5124e-17, 7.2048e-16, 2.0423e-17, 1.5641e-16,\n 7.4798e-15, 1.2932e-18, 2.0393e-17, 4.3031e-17, 1.0317e-18, 6.8231e-16,\n 7.4686e-19, 1.2302e-18, 3.0741e-17, 5.5399e-17, 1.6572e-16, 2.1647e-17,\n 3.5613e-17, 2.8366e-16, 1.7813e-17, 7.1022e-19, 6.9137e-19, 5.6563e-18,\n 1.2594e-16, 2.8112e-16, 4.3230e-20, 5.7095e-15, 1.2968e-15, 1.0679e-17,\n 6.7722e-17, 4.4802e-17, 3.1367e-16, 7.9972e-17, 6.8563e-15, 5.7892e-18,\n 2.4258e-15, 1.6084e-15, 6.5724e-19, 5.2803e-17, 7.2707e-18, 3.3767e-16,\n 8.3710e-16, 6.1834e-18, 4.6350e-16, 3.8035e-17, 6.5461e-16, 4.6639e-18,\n 1.7340e-17, 1.7070e-17, 5.5886e-18, 1.0417e-15, 3.0369e-18, 6.8295e-18,\n 1.3569e-17, 2.0606e-18, 1.3473e-16, 6.1794e-19, 1.3849e-17, 8.1141e-18,\n 1.5454e-15, 7.7205e-16, 3.1937e-16, 2.8449e-16, 3.0631e-18, 2.6066e-15,\n 3.2268e-18, 2.6233e-18, 3.8968e-16, 2.5562e-19, 1.1854e-17, 1.4920e-18,\n 6.4443e-18, 2.2063e-17, 2.3560e-17, 1.3543e-17, 2.8759e-17, 8.0014e-17,\n 3.5317e-18, 8.7987e-20, 1.6447e-18, 3.0441e-18, 1.3283e-17, 7.1035e-18,\n 5.6519e-17, 1.5571e-15, 8.2010e-16, 7.3560e-18, 2.6393e-15, 5.1371e-14,\n 9.8270e-16, 1.0218e-17, 1.4587e-18, 3.0805e-16, 6.0703e-19, 7.5100e-17,\n 2.1108e-16, 2.8969e-18, 2.4505e-17, 3.4580e-16, 6.6374e-17, 1.0295e-15,\n 3.8893e-18, 4.3996e-16, 3.8230e-18, 2.0676e-16, 1.6571e-18, 2.1215e-18,\n 5.4749e-18, 4.5276e-16, 2.7223e-18, 2.7333e-18, 8.0178e-16, 8.8847e-19,\n 1.6479e-15, 2.8886e-18, 3.8029e-15, 1.4073e-16, 5.3536e-16, 4.8278e-19,\n 3.8801e-18, 1.8786e-17, 4.7187e-17, 2.5905e-16, 1.5963e-16, 8.3565e-18,\n 2.0341e-15, 4.8334e-18, 6.2362e-16, 5.8577e-17, 1.3021e-18, 4.5466e-17,\n 3.0644e-18, 1.9109e-17, 3.9393e-16, 1.6665e-18, 3.5295e-18, 5.3028e-18,\n 4.1418e-15, 6.3896e-16, 2.4999e-18, 2.1273e-14, 7.5585e-19, 8.6735e-18,\n 1.7989e-18, 2.7265e-18, 1.0787e-15, 3.3129e-17, 5.5047e-17, 1.3276e-17,\n 1.2314e-17, 5.5283e-17, 7.8173e-17, 4.8068e-16, 4.3228e-18, 1.0516e-14,\n 5.7568e-18, 8.9261e-17, 6.4359e-19, 1.2445e-15, 2.5938e-17, 2.7274e-17,\n 3.4400e-18, 2.2590e-18, 8.8481e-15, 1.3020e-15, 1.4174e-16, 1.2617e-18,\n 1.1694e-19, 1.5296e-16, 1.8987e-17, 2.2893e-17, 2.9925e-16, 7.4168e-18,\n 1.0026e-18, 3.7940e-18, 4.9275e-17, 1.5632e-18, 8.4685e-19, 4.1833e-16,\n 1.2035e-17, 1.0645e-16, 1.0400e-17, 3.8114e-19, 1.2730e-15, 5.4540e-16,\n 1.5462e-15, 1.0169e-18, 4.3533e-17, 1.6090e-16, 1.0936e-18, 1.8904e-19,\n 2.1253e-16, 7.2556e-17, 5.6000e-18, 1.0719e-17, 1.0478e-16, 4.8194e-19,\n 4.4419e-16, 3.8487e-15, 1.7779e-16, 3.5146e-17], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.7348e-16, 9.6423e-17, 7.6621e-17, 4.9783e-18, 1.5991e-17, 6.2173e-18,\n 2.2017e-17, 7.2099e-18, 1.0785e-17, 5.0162e-18, 2.0149e-16, 2.1648e-16,\n 2.5992e-17, 2.8808e-17, 6.1255e-17, 6.6308e-18, 1.3422e-16, 2.2836e-17,\n 9.6211e-17, 3.4730e-16, 5.4338e-20, 1.1073e-16, 3.0956e-17, 2.0540e-16,\n 2.0056e-17, 2.5029e-17, 2.3497e-16, 4.9619e-18, 1.8000e-17, 2.3796e-17,\n 7.4753e-17, 2.4264e-16, 7.2435e-19, 3.1943e-16, 2.2757e-16, 2.8171e-16,\n 5.0859e-18, 1.7514e-19, 1.5429e-16, 8.0050e-16, 3.2164e-16, 1.5932e-17,\n 1.4577e-17, 1.5455e-17, 5.8914e-17, 1.3311e-16, 1.1735e-16, 5.4821e-17,\n 6.4897e-18, 1.1440e-16, 8.5174e-19, 3.5982e-16, 1.4415e-16, 8.5724e-17,\n 2.4885e-18, 1.9860e-17, 8.4446e-17, 5.4680e-18, 7.2829e-17, 2.4430e-17,\n 1.2126e-17, 7.2017e-17, 1.1062e-16, 2.5409e-16, 9.9469e-19, 3.7227e-17,\n 6.8149e-17, 7.1007e-18, 1.1806e-16, 2.0191e-17, 2.0142e-22, 5.5342e-17,\n 1.4624e-16, 2.5095e-17, 7.5073e-18, 6.0859e-19, 4.1199e-17, 3.2882e-16,\n 4.7476e-18, 3.2607e-20, 1.5352e-16, 4.4983e-16, 3.8367e-19, 6.8625e-18,\n 6.5276e-17, 3.2026e-16, 1.9788e-17, 7.7830e-17, 3.7468e-17, 1.2974e-17,\n 7.4914e-17, 2.5941e-16, 5.8220e-17, 6.6163e-17, 2.9029e-16, 4.7756e-17,\n 8.9910e-17, 3.4091e-16, 7.7696e-17, 5.0710e-16, 5.4216e-17, 1.5094e-17,\n 1.3565e-18, 3.3275e-17, 4.0201e-20, 4.8550e-16, 8.4415e-18, 8.1033e-17,\n 3.6199e-17, 4.1919e-20, 3.6725e-16, 2.6236e-17, 1.5266e-16, 1.7547e-16,\n 2.2170e-17, 2.3580e-16, 8.1873e-18, 1.5943e-17, 3.9862e-17, 2.1783e-16,\n 1.5017e-17, 1.0821e-17, 3.9674e-18, 8.7577e-19, 4.0382e-18, 7.4834e-17,\n 1.7923e-16, 3.1623e-17, 9.7739e-17, 1.3037e-16, 1.3285e-19, 3.9741e-16,\n 1.1127e-16, 1.8590e-17, 1.3479e-16, 5.0206e-17, 3.2883e-16, 5.1961e-17,\n 5.5788e-17, 5.3549e-20, 1.1595e-19, 2.5430e-18, 1.6337e-16, 6.6516e-16,\n 1.5183e-16, 3.5979e-17, 3.9792e-17, 7.0711e-17, 3.8740e-17, 1.0569e-18,\n 2.0587e-17, 8.5958e-17, 9.5372e-17, 1.4506e-17, 1.8006e-17, 2.5126e-17,\n 2.6248e-17, 3.5908e-16, 1.3271e-17, 2.2610e-17, 5.5644e-17, 1.5293e-17,\n 1.2206e-17, 6.8351e-17, 2.4269e-17, 6.9393e-17, 3.5767e-16, 2.4301e-17,\n 1.6482e-16, 3.0374e-17, 5.5441e-19, 3.2849e-17, 7.5099e-17, 1.8185e-17,\n 1.3946e-16, 4.4767e-17, 1.2801e-17, 2.7691e-17, 1.1386e-17, 6.6528e-17,\n 4.8440e-17, 1.3887e-17, 1.8916e-17, 3.7661e-17, 4.8914e-16, 3.6136e-20,\n 2.8572e-17, 9.8382e-17, 1.6669e-17, 4.5106e-16, 6.5141e-17, 3.3026e-17,\n 3.3586e-17, 1.4601e-17, 2.4499e-20, 1.2460e-15, 1.0137e-17, 1.6914e-18,\n 4.4807e-17, 1.5170e-16, 2.9378e-18, 5.4018e-16, 3.0657e-17, 2.7229e-17,\n 5.0393e-17, 1.0990e-16, 1.6260e-16, 3.0788e-18, 8.4257e-20, 7.2392e-17,\n 1.2641e-17, 6.2117e-17, 4.1887e-18, 1.6248e-16, 5.9247e-19, 2.6519e-16,\n 3.1563e-16, 5.4415e-20, 5.2105e-18, 2.3349e-17, 5.8262e-18, 2.2707e-16,\n 2.7251e-17, 2.7002e-16, 4.0758e-17, 2.8051e-19, 6.4260e-20, 6.4389e-18,\n 3.0746e-18, 6.5929e-17, 2.3847e-18, 1.4152e-18, 6.3921e-17, 4.2178e-17,\n 2.4201e-16, 2.1984e-18, 9.4234e-18, 1.1707e-16, 3.9860e-17, 2.2604e-17,\n 6.9577e-17, 3.5891e-20, 3.2739e-17, 7.7218e-17, 2.6749e-17, 7.7773e-22,\n 3.0196e-19, 1.3566e-16, 6.4566e-18, 4.2989e-17, 2.5274e-16, 2.3427e-18,\n 1.4006e-16, 1.2870e-17, 3.8810e-16, 1.6438e-17], device='cuda:0')" }, "34": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.8299e-16, 1.0243e-17, 8.8198e-16, 2.1514e-17, 5.8652e-17, 1.3489e-17,\n 1.8015e-16, 1.7208e-15, 6.2247e-16, 5.7332e-17, 5.3259e-15, 1.4243e-16,\n 4.0293e-18, 7.5210e-19, 8.2126e-17, 8.1024e-17, 1.3647e-17, 6.5873e-16,\n 7.4507e-16, 5.0389e-16, 7.1152e-16, 1.8175e-17, 3.3576e-17, 2.4743e-15,\n 5.7805e-17, 2.3264e-16, 7.4327e-16, 3.0533e-18, 2.2598e-18, 3.2990e-16,\n 7.0329e-18, 1.0151e-15, 2.8739e-16, 4.6568e-16, 4.1668e-18, 9.6531e-16,\n 4.1948e-17, 2.8275e-17, 6.6480e-16, 4.3684e-18, 4.6283e-17, 2.0290e-16,\n 7.3675e-16, 1.0402e-16, 9.0182e-18, 1.0090e-17, 1.8616e-16, 6.0644e-19,\n 6.3666e-16, 5.0598e-16, 1.0117e-16, 5.4323e-16, 1.9041e-17, 1.4908e-16,\n 1.2401e-16, 6.3753e-16, 5.9789e-16, 2.2515e-15, 9.9865e-17, 4.0525e-16,\n 7.8448e-15, 7.3331e-18, 5.2296e-18, 6.8922e-16, 1.1934e-17, 1.6281e-15,\n 1.6794e-19, 2.1454e-17, 3.2520e-18, 3.3138e-16, 4.6144e-16, 5.0640e-16,\n 5.1949e-16, 3.1689e-16, 2.0721e-17, 3.9696e-19, 7.6461e-17, 2.5782e-17,\n 6.1363e-16, 1.4397e-16, 4.6311e-19, 2.2511e-15, 2.2250e-15, 4.1979e-18,\n 1.4497e-16, 1.0081e-16, 7.8905e-16, 3.0280e-16, 2.9046e-15, 5.9299e-18,\n 9.3932e-16, 5.6563e-16, 1.4672e-17, 1.5286e-17, 1.2869e-16, 1.8500e-15,\n 4.4579e-16, 1.5623e-17, 1.5165e-15, 2.0847e-16, 8.6986e-16, 1.7664e-18,\n 1.2812e-16, 1.6357e-16, 1.2710e-17, 5.2314e-16, 8.8551e-17, 2.0664e-18,\n 1.5642e-16, 1.0731e-18, 4.6202e-16, 1.0504e-19, 2.7230e-17, 3.6792e-18,\n 1.1704e-15, 3.3634e-16, 5.5776e-16, 1.0448e-15, 2.8892e-17, 2.5916e-15,\n 6.5323e-17, 8.2519e-17, 3.5524e-16, 4.8165e-18, 8.0818e-17, 5.2972e-19,\n 2.5702e-18, 5.7692e-17, 9.5114e-18, 4.5542e-18, 1.0230e-17, 7.1027e-17,\n 5.9786e-18, 5.1729e-18, 7.7228e-18, 2.7027e-17, 5.0454e-18, 3.8995e-17,\n 1.7908e-16, 2.5319e-15, 5.7829e-16, 1.5460e-17, 3.0274e-15, 1.5977e-14,\n 9.5481e-16, 4.1851e-17, 5.0015e-17, 6.8819e-16, 7.5785e-19, 6.4964e-17,\n 2.6174e-16, 1.2245e-17, 8.1537e-18, 6.4933e-16, 5.7461e-16, 2.5181e-16,\n 2.2281e-17, 1.4407e-15, 2.3905e-17, 8.1654e-17, 5.1926e-19, 1.2495e-18,\n 2.7551e-16, 6.1936e-16, 7.0358e-19, 4.0528e-16, 5.5133e-16, 2.1948e-17,\n 1.3172e-15, 9.1864e-19, 1.8732e-15, 5.0781e-16, 6.8201e-16, 1.1528e-18,\n 3.4445e-17, 6.5103e-17, 1.5799e-16, 8.7150e-16, 1.2227e-15, 3.1556e-18,\n 3.4558e-15, 3.7945e-19, 2.9291e-15, 6.5364e-17, 2.6790e-17, 2.2530e-16,\n 3.7684e-17, 8.1331e-18, 4.3602e-16, 6.0720e-17, 9.8149e-17, 3.1167e-17,\n 1.5980e-15, 6.4693e-16, 4.5165e-17, 1.1393e-14, 1.0516e-18, 4.7478e-18,\n 2.8384e-17, 1.3781e-17, 1.4743e-15, 5.2322e-17, 2.5461e-16, 4.5801e-17,\n 1.1123e-17, 3.8151e-16, 3.5640e-17, 1.2736e-15, 2.4621e-17, 4.2167e-15,\n 2.3139e-19, 3.4038e-16, 3.0745e-18, 1.1846e-15, 6.6677e-17, 9.0983e-18,\n 1.1501e-18, 5.5186e-17, 7.6063e-15, 7.9544e-16, 2.0818e-16, 3.0518e-17,\n 5.3015e-18, 1.3305e-16, 1.3885e-16, 9.6504e-17, 2.0037e-15, 1.2617e-16,\n 3.4730e-17, 3.5107e-18, 1.4205e-17, 7.9692e-17, 4.2116e-19, 4.8474e-16,\n 3.1989e-17, 5.2311e-16, 1.3225e-16, 4.7012e-18, 6.4900e-16, 4.9920e-16,\n 1.1678e-15, 1.1876e-17, 5.3274e-16, 5.1506e-16, 2.4728e-17, 9.8327e-18,\n 7.4854e-16, 2.3551e-16, 1.0936e-16, 4.9876e-17, 5.8796e-16, 1.3995e-19,\n 3.9734e-16, 6.6411e-15, 5.2545e-16, 1.0373e-17], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.7477e-19, 3.5198e-18, 2.1473e-18, ..., 4.8599e-19, 2.6609e-18,\n 1.5990e-18],\n [6.9379e-20, 1.0379e-20, 1.1313e-19, ..., 3.0189e-19, 3.5095e-19,\n 1.2508e-20],\n [3.3818e-19, 1.4873e-18, 4.6898e-19, ..., 2.3998e-19, 1.4451e-18,\n 2.7652e-19],\n ...,\n [8.4623e-18, 2.0073e-17, 2.0802e-17, ..., 3.6964e-18, 3.1210e-17,\n 4.5169e-17],\n [5.6508e-17, 8.9377e-17, 1.3795e-16, ..., 3.0633e-17, 1.3497e-16,\n 2.4781e-16],\n [7.7529e-16, 1.7197e-15, 2.0549e-15, ..., 4.3746e-16, 2.5883e-15,\n 4.0608e-15]], device='cuda:0')" }, "35": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.4676e-17, 7.6351e-17, 3.9828e-20, ..., 5.7821e-18, 6.6614e-18,\n 2.8502e-18],\n [3.3143e-18, 1.8893e-18, 1.9339e-19, ..., 8.1606e-20, 9.5223e-19,\n 4.3955e-19],\n [2.2946e-15, 2.6355e-15, 3.2504e-18, ..., 1.8619e-16, 3.1752e-16,\n 8.2547e-17],\n ...,\n [7.6805e-15, 7.9047e-15, 8.6512e-18, ..., 5.2038e-16, 9.3085e-16,\n 2.5598e-16],\n [2.1278e-19, 7.3422e-21, 8.0452e-20, ..., 4.0370e-20, 4.6883e-20,\n 1.3685e-20],\n [4.3037e-15, 4.9013e-15, 3.1293e-18, ..., 3.6676e-16, 5.8842e-16,\n 1.5315e-16]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9653e-17, 6.3162e-19, 5.6386e-18, 3.7106e-19, 2.4845e-18, 3.6731e-18,\n 5.4238e-19, 4.6920e-19, 2.2857e-20, 3.8294e-18, 3.1060e-18, 5.3720e-18,\n 1.3343e-19, 1.1295e-19, 1.3743e-18, 9.0137e-19, 2.9734e-19, 1.1768e-17,\n 4.0161e-18, 5.1439e-20, 2.0251e-19, 1.6433e-18, 3.7944e-18, 1.4637e-19,\n 7.0890e-18, 8.3138e-19, 1.1772e-18, 1.2003e-19, 2.7358e-19, 5.5962e-19,\n 5.5855e-19, 9.2899e-18, 1.3463e-18, 7.2065e-20, 2.7185e-21, 2.7774e-18,\n 9.8835e-19, 3.9818e-19, 1.1906e-20, 5.3087e-22, 1.6130e-18, 2.0185e-19,\n 9.8561e-19, 4.6537e-19, 7.9611e-20, 9.0631e-19, 1.1476e-18, 1.1643e-18,\n 6.1478e-19, 2.6124e-18, 1.0138e-18, 2.8834e-19, 3.7624e-19, 2.2156e-19,\n 8.3114e-21, 1.1301e-20, 2.5864e-19, 2.0760e-20, 8.3219e-19, 3.5392e-20,\n 2.9391e-18, 9.8023e-19, 2.3132e-21, 1.7833e-18, 3.4487e-18, 1.1746e-18,\n 1.7612e-18, 3.4872e-19, 4.4662e-20, 2.7530e-20, 1.8045e-18, 1.9061e-19,\n 1.5167e-20, 7.4518e-19, 4.3419e-19, 1.1704e-18, 8.6083e-19, 2.2293e-19,\n 3.6921e-24, 8.0845e-19, 6.1569e-19, 5.4192e-18, 8.2926e-20, 7.6614e-19,\n 1.0422e-19, 5.2587e-19, 3.0654e-19, 8.4825e-22, 5.2160e-19, 6.7698e-20,\n 2.4214e-21, 3.9286e-19, 3.5130e-19, 1.0012e-17, 3.2986e-18, 2.2939e-18,\n 2.4991e-19, 1.1793e-17, 8.1458e-19, 1.5106e-18, 1.4040e-19, 1.6151e-19,\n 7.9548e-20, 8.6395e-19, 1.0502e-20, 3.0129e-18, 4.9476e-18, 5.4993e-19,\n 6.2445e-19, 9.3877e-20, 9.4963e-19, 1.0784e-17, 8.1575e-20, 1.2354e-17,\n 1.7137e-18, 7.6342e-18, 7.4916e-25, 7.0028e-18, 5.1012e-18, 2.5246e-18,\n 9.6618e-18, 4.3225e-20, 5.0442e-18, 1.9020e-20, 2.7529e-18, 4.5853e-18,\n 4.1555e-18, 3.0099e-18, 5.5559e-18, 8.4077e-18, 9.6691e-19, 1.8858e-18,\n 5.6794e-20, 3.2407e-18, 9.3105e-19, 8.5159e-19, 1.9690e-18, 2.6661e-18,\n 2.2401e-21, 5.2016e-19, 6.5397e-18, 2.4067e-20, 1.3755e-18, 5.0750e-19,\n 3.1769e-18, 1.3215e-17, 2.6277e-18, 3.3749e-20, 8.3346e-18, 1.2639e-19,\n 2.6881e-19, 2.1837e-18, 9.0673e-18, 7.4527e-19, 3.2020e-18, 4.8038e-18,\n 3.5374e-20, 8.9206e-21, 3.2699e-19, 1.6076e-18, 1.5337e-18, 4.2535e-19,\n 6.4031e-18, 3.3120e-18, 3.6881e-19, 3.5587e-21, 7.1066e-20, 1.0068e-20,\n 2.0104e-19, 9.0532e-19, 1.8306e-19, 1.1106e-19, 7.6668e-19, 9.2875e-19,\n 8.4538e-20, 2.3719e-20, 8.8873e-19, 2.0519e-19, 1.2743e-19, 1.8162e-20,\n 1.4808e-18, 4.3072e-21, 3.2027e-19, 1.3826e-18, 3.9361e-19, 1.8354e-19,\n 1.3715e-18, 4.6045e-19, 4.6202e-18, 1.5085e-19, 1.2884e-18, 1.0620e-19,\n 1.1670e-18, 1.1588e-17, 1.6355e-18, 3.4852e-18, 1.7827e-18, 5.2109e-18,\n 5.9601e-22, 6.3281e-20, 3.0311e-19, 4.5004e-19, 7.7588e-19, 6.3915e-18,\n 1.6046e-19, 4.8043e-19, 3.5927e-20, 5.4112e-19, 6.2329e-20, 1.4474e-18,\n 2.4769e-22, 9.7330e-19, 6.5760e-20, 7.1688e-19, 6.5961e-18, 3.3438e-18,\n 4.9121e-18, 1.3285e-18, 1.6811e-18, 5.2298e-18, 1.3281e-18, 2.5361e-20,\n 6.7241e-19, 5.9812e-18, 3.8259e-20, 7.8963e-19, 4.7815e-18, 1.7000e-18,\n 1.4997e-18, 1.4509e-18, 1.2557e-18, 3.7388e-20, 5.5150e-19, 2.9133e-19,\n 1.3515e-18, 3.7093e-19, 9.3405e-21, 8.5688e-20, 3.4602e-19, 3.6440e-20,\n 3.3520e-18, 9.2454e-19, 1.4154e-19, 1.1205e-18, 3.4318e-20, 1.2814e-18,\n 3.4418e-18, 4.4162e-19, 1.4633e-18, 4.5615e-19, 3.4799e-19, 3.5788e-18,\n 1.3899e-18, 3.9601e-18, 3.7045e-18, 8.0299e-19, 1.2740e-34, 5.6316e-36,\n 2.2976e-33, 1.6154e-34, 2.5724e-33, 1.0110e-34, 6.6316e-35, 1.4048e-33,\n 9.2061e-34, 1.3400e-33, 4.0453e-35, 5.0106e-35, 1.4052e-33, 1.2629e-33,\n 6.9622e-35, 1.5342e-34, 5.6518e-34, 3.6007e-37, 3.5728e-33, 5.6551e-34,\n 1.8609e-34, 1.2711e-33, 5.6533e-34, 5.8035e-34, 4.2434e-34, 1.3553e-34,\n 3.3299e-34, 3.8109e-35, 2.8795e-33, 5.9183e-34, 5.1410e-34, 4.5789e-34,\n 2.4784e-33, 7.5076e-34, 8.0986e-35, 2.8697e-34, 4.5604e-34, 6.5071e-36,\n 6.2639e-35, 1.9649e-35, 1.3527e-36, 6.0458e-35, 2.9939e-34, 4.2979e-36,\n 4.9102e-34, 1.4436e-34, 7.6282e-34, 3.9414e-36, 6.2390e-34, 5.3745e-36,\n 1.6447e-34, 1.8787e-38, 2.3097e-34, 1.0995e-33, 3.1904e-34, 6.1233e-35,\n 2.3887e-33, 3.0730e-34, 4.4387e-33, 3.5716e-34, 1.0261e-35, 5.7075e-35,\n 5.1814e-34, 3.0720e-34, 7.9150e-35, 4.1250e-34, 1.5097e-33, 1.9782e-33,\n 1.0535e-33, 4.3292e-35, 3.6859e-33, 2.6868e-33, 2.8236e-33, 4.7127e-33,\n 9.7499e-33, 3.0658e-34, 6.0759e-33, 1.2922e-34, 4.4750e-34, 8.9240e-35,\n 1.3151e-33, 5.2093e-33, 3.1125e-33, 2.0280e-33, 5.3958e-34, 9.7591e-34,\n 4.7493e-33, 1.0681e-32, 5.9818e-34, 1.6384e-33, 9.4309e-34, 4.9385e-33,\n 5.1817e-34, 3.0279e-35, 3.4361e-34, 3.2805e-34, 3.0508e-33, 1.0372e-35,\n 6.2202e-34, 4.4981e-36, 3.5366e-34, 1.6699e-33, 2.4551e-33, 2.1041e-34,\n 5.5505e-35, 2.4246e-34, 4.1953e-34, 2.4060e-35, 1.4778e-35, 6.9081e-35,\n 7.9857e-35, 1.3655e-35, 3.1606e-34, 3.9505e-34, 5.5881e-34, 5.0682e-34,\n 3.6044e-34, 4.7597e-36, 1.6077e-37, 9.2621e-36, 6.5600e-36, 6.8146e-34,\n 9.4484e-34, 6.9156e-34, 3.2653e-34, 8.8595e-34, 4.3912e-36, 1.4847e-33,\n 1.5807e-35, 1.8156e-34, 7.3954e-35, 2.3566e-33, 9.4194e-34, 1.4728e-33,\n 1.2253e-33, 2.5952e-35, 2.3084e-35, 2.7249e-34, 5.4554e-34, 1.6885e-33,\n 1.7848e-34, 8.4675e-35, 4.8585e-35, 7.3591e-36, 2.7700e-33, 2.6054e-34,\n 7.8303e-34, 5.8998e-35, 9.2767e-35, 6.0003e-33, 1.0200e-33, 1.1598e-35,\n 8.2781e-34, 9.8265e-34, 2.8521e-33, 1.6954e-33, 2.1716e-33, 1.0389e-33,\n 1.5678e-35, 1.2225e-32, 2.7699e-33, 3.3970e-35, 3.3504e-33, 8.5116e-34,\n 7.4078e-34, 4.3623e-35, 5.0292e-34, 2.3973e-34, 2.4972e-34, 9.8148e-34,\n 7.8497e-34, 9.3411e-38, 7.2831e-34, 7.3615e-35, 1.1334e-35, 1.7322e-34,\n 1.7737e-33, 1.2551e-33, 6.4896e-36, 9.8860e-34, 2.8698e-34, 2.0612e-34,\n 3.4540e-33, 3.4595e-33, 3.6617e-34, 1.8818e-35, 3.5350e-33, 3.2753e-33,\n 1.5273e-33, 1.9425e-34, 1.9822e-34, 7.8723e-34, 9.2318e-34, 1.0339e-33,\n 5.0876e-33, 3.5811e-34, 1.1218e-34, 1.7711e-33, 3.4187e-34, 1.7086e-33,\n 2.6965e-34, 2.9229e-34, 2.7720e-34, 1.3204e-33, 2.5763e-33, 1.8579e-33,\n 1.2376e-35, 4.6927e-35, 6.0303e-34, 2.4915e-37, 1.8288e-34, 1.1475e-33,\n 1.2247e-34, 3.5537e-34, 5.2432e-34, 1.0465e-33, 3.2696e-34, 5.2023e-34,\n 1.0767e-33, 5.8924e-34, 5.0881e-35, 1.1797e-34, 3.7582e-35, 6.8300e-35,\n 4.6421e-34, 3.0784e-34, 8.6830e-34, 2.9821e-33, 1.8803e-33, 1.2903e-33,\n 9.2349e-34, 1.1029e-33, 3.9835e-35, 2.2848e-33, 6.9145e-35, 1.0420e-34,\n 4.9152e-34, 2.8501e-34, 5.1930e-34, 1.0490e-34, 8.0482e-35, 1.6155e-38,\n 4.8751e-35, 4.0341e-35, 7.5042e-34, 1.3341e-33, 1.0378e-34, 8.9664e-34,\n 3.9450e-34, 1.1791e-33, 9.7426e-36, 3.3109e-34, 9.4364e-35, 1.3969e-33,\n 2.1663e-34, 1.2491e-34, 2.5923e-15, 4.0580e-14, 7.1431e-15, 6.8120e-15,\n 5.3898e-14, 4.7890e-14, 6.5326e-16, 3.4819e-16, 8.7292e-16, 8.7369e-15,\n 1.1572e-17, 2.3292e-15, 4.2282e-14, 1.4222e-15, 2.3402e-15, 1.0715e-14,\n 1.5731e-14, 3.2313e-15, 2.8803e-14, 1.0867e-14, 1.2053e-17, 3.3849e-17,\n 5.8456e-15, 3.8511e-14, 3.2444e-14, 1.1303e-16, 1.4614e-14, 7.8598e-16,\n 4.1119e-15, 8.3421e-16, 1.8185e-14, 1.5185e-14, 5.8856e-14, 6.3177e-16,\n 1.2041e-14, 1.7285e-14, 1.0197e-14, 1.0320e-14, 9.1018e-15, 3.5974e-16,\n 5.5740e-14, 1.8253e-16, 9.0170e-16, 1.5876e-15, 2.6993e-14, 2.3083e-15,\n 5.0480e-15, 2.1065e-15, 4.0517e-16, 3.8580e-15, 7.7335e-15, 1.3814e-15,\n 2.2557e-14, 2.1289e-14, 2.2944e-15, 1.6078e-15, 2.4879e-15, 1.8032e-16,\n 1.4568e-14, 5.5262e-15, 1.1779e-14, 1.1658e-14, 1.2274e-15, 3.1784e-16,\n 6.5621e-19, 3.1992e-17, 9.8816e-17, 2.9552e-16, 4.2620e-15, 3.0373e-16,\n 3.7953e-15, 9.8975e-15, 1.4244e-14, 2.2705e-15, 5.1454e-15, 6.4968e-17,\n 2.9017e-15, 8.5925e-14, 4.5085e-16, 4.3317e-15, 5.8155e-15, 3.9772e-14,\n 1.4597e-17, 1.1281e-14, 3.2504e-15, 1.5699e-14, 1.0023e-14, 1.1639e-14,\n 9.5099e-19, 3.7123e-14, 2.4955e-14, 2.2718e-17, 1.1176e-15, 2.6884e-14,\n 1.2487e-15, 5.2152e-15, 7.3711e-17, 1.8524e-14, 3.5666e-15, 1.7335e-14,\n 9.0353e-15, 3.1332e-14, 3.9461e-16, 7.5760e-15, 1.5533e-14, 5.6603e-14,\n 2.5030e-14, 4.4177e-17, 9.4903e-16, 9.5036e-15, 5.9401e-15, 1.0970e-15,\n 1.6736e-15, 1.9785e-15, 5.8676e-15, 8.7266e-16, 7.3935e-15, 1.4673e-16,\n 2.2709e-14, 2.8702e-16, 2.2251e-14, 2.5490e-14, 7.8540e-15, 2.3145e-16,\n 2.6842e-15, 3.1423e-16, 7.3483e-15, 1.7416e-14, 1.7390e-15, 4.5113e-16,\n 1.8108e-14, 8.9848e-16, 1.0575e-14, 5.9589e-15, 3.0699e-15, 2.0908e-17,\n 4.2659e-16, 4.5183e-14, 5.8226e-16, 3.1347e-14, 5.4126e-18, 1.5516e-14,\n 5.8790e-15, 5.8727e-15, 1.7333e-15, 2.1118e-14, 7.9443e-15, 3.0853e-15,\n 1.2807e-14, 3.9970e-17, 2.9107e-14, 1.8334e-15, 2.2573e-16, 3.0266e-15,\n 5.9841e-15, 2.3470e-14, 4.5229e-15, 1.0751e-14, 1.6675e-16, 2.7040e-14,\n 7.9058e-15, 8.0796e-15, 6.3403e-15, 4.9587e-14, 4.5161e-17, 1.4029e-15,\n 4.3027e-14, 3.3582e-15, 3.9673e-15, 2.0639e-16, 6.8747e-15, 1.7786e-14,\n 1.5455e-14, 2.3504e-14, 5.7032e-16, 6.9953e-15, 4.0822e-15, 3.6530e-15,\n 3.7334e-15, 2.3518e-15, 5.9510e-15, 1.4929e-14, 1.1466e-15, 2.5459e-14,\n 2.8431e-14, 6.9084e-15, 6.7850e-15, 6.4578e-15, 4.6707e-17, 1.3819e-15,\n 1.4074e-15, 3.0352e-15, 1.3731e-14, 1.7391e-15, 7.5672e-16, 1.0467e-14,\n 2.8885e-18, 2.5447e-14, 2.1518e-16, 2.7881e-14, 1.1862e-14, 3.3343e-14,\n 8.6068e-16, 5.7033e-15, 2.2396e-14, 8.4272e-15, 8.1818e-16, 2.1616e-14,\n 1.6469e-15, 1.0478e-14, 5.3225e-17, 1.6143e-15, 4.7152e-14, 1.0287e-14,\n 4.7324e-14, 5.3247e-15, 7.6026e-15, 1.8831e-17, 2.6296e-15, 6.6313e-15,\n 6.0978e-17, 4.4638e-16, 5.7284e-15, 1.8342e-15, 2.6353e-15, 1.5011e-15,\n 1.7008e-14, 1.7036e-15, 4.3022e-16, 1.4201e-16, 4.7024e-17, 8.1897e-17,\n 1.0262e-15, 1.4065e-14, 9.8126e-16, 2.0064e-15, 2.5076e-16, 2.3946e-14,\n 1.1742e-14, 2.2769e-14, 5.1809e-14, 1.0730e-15, 2.8042e-14, 1.1444e-14,\n 2.5414e-15, 9.5184e-14, 2.0541e-15, 9.7952e-15, 3.2842e-15, 2.3567e-15,\n 1.2260e-15, 1.0531e-14, 5.6732e-15, 2.7401e-16, 1.4191e-15, 2.4386e-14],\n device='cuda:0')" }, "36": { - "step": "tensor(15024.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.7404e-14, 1.2253e-16, 1.3467e-12, 6.0767e-12, 1.4845e-13, 3.1678e-15,\n 8.8836e-13, 5.2250e-13, 3.7186e-13, 8.5798e-16, 5.0437e-14, 2.7530e-16,\n 2.8599e-13, 1.7065e-14, 8.8634e-15, 1.1396e-13, 1.5302e-14, 1.4381e-13,\n 5.4671e-15, 1.2298e-13, 3.1275e-14, 7.0254e-16, 9.3110e-13, 1.8855e-12,\n 1.4946e-13, 2.2546e-14, 2.2777e-13, 2.0156e-13, 8.4300e-15, 4.8237e-14,\n 7.3027e-14, 1.1265e-13, 2.2612e-13, 1.8168e-13, 2.0170e-15, 1.5415e-13,\n 5.3489e-13, 7.3244e-14, 4.0258e-13, 3.0692e-15, 8.2029e-13, 4.7747e-15,\n 5.1932e-13, 6.2591e-13, 1.2792e-14, 1.6124e-15, 5.1530e-14, 2.0651e-14,\n 6.3863e-15, 7.9622e-13, 1.6352e-13, 7.7756e-13, 2.4650e-16, 9.8889e-14,\n 3.0413e-14, 9.0796e-15, 2.1616e-12, 3.2293e-14, 1.9506e-13, 3.3329e-16,\n 1.0200e-15, 2.3291e-15, 2.9649e-13, 1.4885e-13, 1.1572e-13, 7.0115e-13,\n 1.4192e-13, 5.1344e-15, 1.6752e-13, 6.7110e-13, 1.3641e-13, 8.2236e-14,\n 5.4982e-14, 8.2386e-16, 1.4777e-16, 7.0829e-14, 1.2559e-13, 1.4610e-14,\n 1.2343e-12, 1.2500e-16, 1.7227e-15, 1.3585e-13, 1.8547e-12, 3.9331e-16,\n 1.5028e-13, 4.5179e-15, 1.5999e-13, 1.9043e-14, 2.3957e-13, 1.6708e-12,\n 2.4381e-13, 5.9379e-14, 1.0026e-14, 1.6869e-13, 1.1154e-12, 5.8851e-12,\n 2.2815e-15, 3.0950e-15, 3.6912e-14, 1.0699e-13, 2.0792e-13, 4.0035e-14,\n 5.3824e-14, 1.8664e-13, 1.1423e-12, 2.7314e-15, 2.3745e-16, 6.7682e-14,\n 8.2891e-14, 6.4110e-13, 6.3984e-13, 8.4808e-16, 5.2204e-16, 6.7125e-15,\n 9.1233e-15, 2.0987e-13, 2.0245e-13, 5.4583e-14, 2.3863e-13, 1.0830e-14,\n 1.8586e-12, 6.5979e-13, 8.0654e-16, 6.0372e-14, 1.8192e-13, 5.2804e-15,\n 1.8442e-15, 4.5419e-14, 1.5578e-15, 1.0715e-13, 1.5039e-14, 6.3261e-14,\n 5.4559e-13, 8.3248e-14, 5.7526e-14, 5.1675e-14, 2.2690e-13, 2.3962e-14,\n 4.5302e-14, 5.4901e-16, 8.4240e-15, 1.2159e-12, 3.5045e-13, 3.8037e-14,\n 1.2326e-12, 6.6674e-16, 2.2344e-16, 2.7732e-13, 3.8410e-14, 1.2391e-14,\n 7.6749e-15, 8.4801e-14, 4.5642e-14, 1.8924e-15, 8.2019e-15, 1.2280e-14,\n 7.7123e-15, 2.0795e-12, 6.1761e-13, 2.1137e-12, 4.7188e-14, 6.4969e-14,\n 2.3755e-12, 6.8147e-17, 4.5123e-14, 6.6413e-12, 7.6254e-13, 1.4191e-13,\n 1.7904e-12, 2.4707e-15, 4.3924e-14, 3.8957e-13, 7.9061e-14, 1.6954e-17,\n 5.2453e-14, 2.0370e-12, 1.1739e-14, 2.5556e-15, 3.0311e-12, 8.9727e-13,\n 1.0699e-15, 9.3506e-15, 1.1514e-15, 5.5140e-14, 2.1965e-13, 1.2426e-16,\n 7.7690e-15, 1.7670e-12, 9.3385e-15, 2.5570e-16, 1.9673e-15, 3.5408e-13,\n 4.1491e-13, 3.4584e-13, 4.5305e-13, 5.0840e-14, 5.6849e-16, 1.4358e-12,\n 1.8914e-13, 7.7792e-14, 1.0736e-13, 4.9515e-14, 4.2468e-14, 8.6472e-15,\n 5.3897e-15, 7.7378e-15, 4.8491e-15, 2.9170e-14, 1.8467e-15, 4.3765e-13,\n 1.5505e-13, 1.2284e-14, 1.4080e-14, 6.3412e-14, 3.8918e-12, 5.1535e-15,\n 2.3381e-14, 2.3904e-15, 1.1215e-12, 1.6660e-12, 1.8800e-16, 8.6931e-14,\n 6.4419e-14, 2.2592e-13, 3.8758e-16, 5.4168e-15, 7.4620e-13, 8.4833e-13,\n 2.3700e-16, 5.2118e-16, 1.2253e-16, 1.3038e-14, 1.2275e-13, 3.6623e-15,\n 2.8260e-14, 3.2702e-13, 5.5565e-14, 7.8568e-17, 4.5280e-13, 5.6570e-15,\n 7.2005e-13, 5.5642e-13, 1.5277e-12, 1.1369e-12, 2.7110e-14, 2.3628e-15,\n 3.6677e-15, 3.9851e-13, 9.0774e-13, 1.1625e-14, 1.1535e-12, 2.9361e-15,\n 1.9169e-13, 3.9360e-12, 4.4135e-17, 2.5207e-12], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.6831e-16, 4.8487e-19, 4.8488e-15, ..., 1.2557e-17, 1.2641e-16,\n 3.4943e-16],\n [1.1770e-15, 1.2266e-19, 6.8774e-15, ..., 1.9684e-17, 1.8679e-16,\n 4.9332e-16],\n [6.6461e-16, 8.4539e-20, 3.8871e-15, ..., 1.0270e-17, 1.1063e-16,\n 2.5135e-16],\n ...,\n [5.0290e-16, 7.1781e-20, 3.0500e-15, ..., 4.7173e-18, 7.6094e-17,\n 2.0231e-16],\n [6.6068e-17, 1.7670e-19, 3.6393e-16, ..., 1.2658e-18, 7.1535e-18,\n 2.7427e-17],\n [3.4957e-16, 4.2123e-19, 2.0513e-15, ..., 5.9699e-18, 4.7772e-17,\n 1.4038e-16]], device='cuda:0')" }, "37": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0567e-16, 1.6933e-17, 3.1715e-15, 1.1284e-14, 2.6763e-17, 3.3289e-17,\n 4.1176e-16, 1.7337e-16, 5.3248e-17, 1.8008e-19, 1.3703e-16, 6.2492e-18,\n 5.2234e-16, 3.0283e-17, 7.0548e-17, 2.5016e-18, 1.0375e-16, 5.8772e-17,\n 4.1467e-18, 9.4714e-17, 1.2894e-17, 5.0716e-18, 3.1831e-16, 9.3260e-15,\n 3.6691e-16, 6.4477e-18, 1.8808e-16, 7.5560e-18, 2.3122e-17, 5.6110e-18,\n 6.6684e-17, 2.2788e-16, 2.5188e-17, 2.4164e-16, 4.9287e-19, 1.0754e-15,\n 1.3632e-15, 3.1812e-17, 6.8073e-16, 1.1813e-17, 2.3924e-15, 1.8157e-18,\n 4.2657e-16, 2.1762e-15, 6.8650e-18, 2.8221e-17, 3.8919e-18, 6.3968e-19,\n 1.9026e-18, 6.2263e-16, 1.5936e-16, 2.1684e-16, 9.3671e-19, 7.3912e-18,\n 3.8299e-18, 1.4094e-18, 5.6272e-16, 1.3621e-16, 1.6487e-17, 4.9295e-18,\n 3.0567e-17, 2.7094e-18, 1.7504e-16, 4.4005e-18, 1.9848e-17, 1.8838e-16,\n 1.6802e-16, 9.4010e-19, 9.9352e-18, 1.4853e-15, 5.9986e-17, 3.9246e-18,\n 1.5024e-17, 4.3620e-18, 6.8659e-19, 4.5345e-18, 3.3146e-17, 4.0518e-18,\n 1.5086e-15, 1.2371e-19, 1.6542e-18, 1.1731e-17, 1.9458e-15, 1.1111e-17,\n 2.5304e-16, 5.3889e-19, 5.2534e-17, 3.5907e-18, 9.3423e-17, 9.8808e-16,\n 2.2188e-16, 1.0455e-17, 8.5026e-18, 7.4690e-18, 1.7727e-15, 2.4871e-14,\n 1.2808e-17, 7.1817e-18, 1.3269e-17, 2.8054e-16, 3.0155e-17, 2.6759e-18,\n 1.4700e-16, 6.0237e-17, 8.7679e-16, 3.0870e-19, 6.8513e-19, 2.3701e-16,\n 1.6918e-17, 8.2819e-16, 2.5098e-15, 8.1547e-19, 2.0514e-19, 6.7305e-19,\n 1.2311e-17, 7.8744e-17, 1.5634e-16, 7.0920e-18, 8.4938e-17, 5.1185e-17,\n 1.3796e-15, 1.4304e-15, 1.9505e-19, 6.8967e-17, 1.3442e-15, 5.2938e-19,\n 4.7262e-18, 5.8375e-18, 8.7739e-19, 2.2951e-17, 1.2505e-18, 2.9313e-17,\n 3.0372e-16, 1.6490e-17, 6.7495e-18, 2.5766e-18, 2.4225e-16, 1.2948e-18,\n 8.5985e-17, 1.8687e-17, 9.9874e-19, 7.8466e-16, 8.4111e-17, 2.2649e-16,\n 3.4645e-15, 8.9660e-19, 9.4929e-19, 2.6218e-16, 7.8288e-17, 2.2485e-18,\n 3.0578e-18, 8.4444e-17, 1.9285e-18, 9.0410e-19, 4.4558e-19, 4.1966e-16,\n 1.2803e-18, 7.7931e-15, 2.7825e-16, 6.3114e-16, 6.0599e-18, 6.1200e-17,\n 1.0017e-14, 4.0236e-18, 3.1031e-18, 1.2299e-14, 1.8771e-15, 3.2520e-16,\n 1.9824e-15, 1.4231e-17, 5.3314e-20, 1.8030e-16, 5.9494e-17, 1.3407e-17,\n 1.5858e-18, 2.1676e-15, 9.7329e-18, 6.1063e-18, 3.4162e-15, 1.7952e-15,\n 1.6300e-17, 2.0350e-17, 5.2651e-17, 4.3184e-17, 4.3274e-17, 1.1318e-18,\n 1.8603e-18, 2.6741e-15, 2.8726e-18, 1.4513e-18, 1.3940e-17, 5.3336e-17,\n 1.3816e-15, 5.8701e-16, 1.6835e-16, 2.2069e-16, 4.0111e-20, 7.7456e-15,\n 2.1779e-16, 2.5970e-18, 1.1058e-17, 1.6053e-18, 4.8848e-18, 1.0073e-16,\n 7.3320e-19, 1.4103e-18, 6.2406e-17, 1.7917e-19, 2.0280e-17, 1.6313e-16,\n 1.0704e-16, 1.0373e-18, 5.4384e-19, 1.8303e-17, 5.2227e-15, 1.6341e-19,\n 4.1597e-19, 1.7446e-18, 1.1835e-16, 3.8012e-15, 2.3380e-18, 2.5798e-17,\n 4.8930e-17, 4.2639e-16, 8.4453e-18, 8.8259e-18, 4.7657e-17, 3.2374e-16,\n 5.1248e-18, 2.9572e-18, 1.9689e-17, 3.0209e-18, 2.6314e-16, 2.4205e-18,\n 9.1819e-19, 5.2766e-16, 5.9112e-17, 3.5644e-19, 1.5909e-15, 1.3628e-18,\n 1.2655e-16, 7.5293e-16, 1.8326e-15, 3.4842e-15, 1.0101e-17, 1.1684e-17,\n 2.1920e-17, 2.7008e-16, 3.1066e-15, 7.1117e-18, 2.7518e-15, 2.1190e-19,\n 1.0408e-16, 2.8630e-15, 1.3892e-18, 1.6694e-14], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1413e-14, 4.3701e-14, 2.3371e-14, 1.2408e-13, 3.7657e-16, 3.5989e-15,\n 6.5907e-14, 1.2375e-13, 1.0010e-13, 4.9845e-14, 2.1605e-15, 1.0408e-13,\n 5.1417e-15, 2.5641e-16, 1.3534e-14, 1.2931e-16, 5.6531e-14, 1.0705e-13,\n 2.9939e-14, 6.3961e-15, 1.1657e-14, 8.5796e-15, 4.0842e-14, 5.2355e-16,\n 1.3609e-15, 1.1548e-14, 1.1907e-15, 1.7409e-14, 8.2501e-15, 2.5901e-14,\n 5.6637e-17, 1.3899e-14, 1.4064e-14, 4.8982e-15, 6.3061e-16, 2.7574e-14,\n 9.4416e-16, 1.5855e-13, 2.6588e-14, 1.8922e-15, 5.9730e-14, 3.8909e-15,\n 1.1433e-14, 1.0338e-15, 2.0862e-14, 3.5779e-14, 4.4926e-16, 4.6738e-17,\n 2.9350e-14, 2.6666e-14, 3.8094e-14, 1.5876e-15, 1.6556e-14, 6.2054e-14,\n 7.9273e-16, 6.9338e-15, 6.2723e-14, 2.4884e-16, 2.8784e-15, 5.1676e-15,\n 2.1692e-14, 1.6979e-14, 3.4929e-15, 3.3188e-14, 2.0864e-16, 5.2313e-14,\n 7.1252e-15, 1.3622e-14, 1.5339e-14, 5.6167e-15, 2.6387e-15, 5.1696e-14,\n 6.3340e-14, 1.3270e-13, 8.5635e-16, 1.0361e-16, 5.2256e-15, 4.4617e-14,\n 6.7608e-16, 1.2831e-16, 1.0488e-14, 6.6503e-16, 2.6591e-14, 4.5677e-15,\n 5.0262e-15, 2.5981e-15, 5.0766e-15, 7.2989e-15, 7.5243e-15, 8.7782e-14,\n 7.6500e-17, 5.2189e-14, 2.1642e-14, 5.8149e-14, 1.4171e-13, 9.0325e-14,\n 9.1724e-15, 1.2366e-13, 4.4453e-15, 1.5629e-15, 5.5043e-14, 1.8922e-17,\n 2.3995e-14, 1.4496e-14, 1.3666e-15, 3.8353e-14, 2.3051e-17, 2.2587e-13,\n 4.6795e-14, 4.8546e-14, 1.7156e-16, 1.4042e-15, 4.3486e-15, 4.3381e-14,\n 1.3675e-14, 3.6454e-14, 7.7088e-17, 4.5057e-14, 5.6658e-14, 2.2714e-14,\n 7.5750e-14, 2.9925e-14, 2.3390e-14, 3.3520e-15, 1.5847e-13, 8.3103e-14,\n 2.1122e-14, 8.7290e-15, 1.8096e-14, 6.4410e-17, 1.8845e-13, 1.8314e-14,\n 2.3791e-16, 7.0258e-17, 1.2240e-13, 4.7216e-14, 7.1095e-15, 3.1241e-15,\n 2.8890e-16, 9.4594e-17, 7.8034e-14, 1.6231e-16, 1.1709e-13, 2.2783e-15,\n 2.6433e-15, 5.6034e-15, 4.6625e-14, 3.9108e-14, 1.1358e-14, 1.3677e-16,\n 2.2519e-15, 7.1856e-16, 8.5269e-17, 7.7095e-14, 6.8216e-15, 1.0114e-13,\n 2.4267e-15, 5.6697e-14, 7.6711e-15, 7.6834e-14, 2.0085e-14, 3.2693e-15,\n 3.4492e-14, 3.0007e-14, 4.1632e-14, 9.1753e-14, 1.9278e-13, 9.1081e-17,\n 3.6171e-16, 1.7542e-14, 1.0144e-14, 1.5444e-16, 6.2206e-14, 8.0069e-15,\n 8.9934e-14, 1.1215e-16, 4.2028e-14, 4.6898e-15, 2.0234e-14, 2.0307e-14,\n 2.1650e-13, 2.1556e-14, 1.3920e-14, 2.0694e-17, 4.0557e-15, 6.2865e-14,\n 2.5678e-16, 2.2967e-14, 5.8512e-14, 5.2134e-15, 5.9782e-14, 5.1983e-15,\n 1.5070e-14, 1.3794e-14, 9.1150e-14, 4.8776e-14, 5.7390e-14, 4.2129e-16,\n 1.8283e-14, 1.1709e-13, 1.2519e-16, 1.0861e-14, 5.5984e-15, 8.2095e-14,\n 8.0206e-14, 1.1472e-14, 3.7946e-14, 7.8278e-15, 5.6070e-16, 2.1388e-16,\n 2.5401e-16, 1.2774e-14, 6.5816e-14, 5.8580e-14, 2.1139e-14, 7.3536e-14,\n 8.9318e-15, 4.4182e-14, 3.2186e-16, 1.1593e-15, 2.2138e-17, 6.2401e-15,\n 6.9216e-14, 5.1582e-15, 1.7396e-15, 3.5625e-14, 1.0029e-14, 3.1315e-14,\n 2.2269e-15, 4.4172e-14, 6.1819e-16, 2.4171e-14, 7.5132e-14, 3.1691e-16,\n 6.2561e-14, 1.4038e-15, 1.4855e-14, 5.5760e-17, 1.1795e-13, 2.3774e-15,\n 1.0812e-14, 2.4408e-15, 2.8556e-15, 1.6726e-14, 2.4615e-14, 1.7058e-14,\n 6.1662e-14, 2.3448e-14, 9.9821e-14, 9.5915e-15, 5.1917e-14, 1.1255e-18,\n 4.0920e-15, 1.9280e-14, 2.4922e-15, 1.3266e-14], device='cuda:0')" }, "38": { - "step": "tensor(15024.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.8724e-17, 5.2119e-20, 2.1313e-15, 7.6025e-15, 2.3417e-16, 7.6431e-18,\n 1.2798e-15, 8.2241e-16, 5.5656e-16, 6.2158e-19, 3.8120e-17, 2.3230e-18,\n 5.8748e-16, 1.1334e-16, 7.8489e-17, 1.0974e-16, 9.0146e-17, 2.3789e-16,\n 4.8198e-18, 3.0413e-16, 1.0325e-16, 2.8979e-18, 1.1602e-15, 2.8518e-15,\n 4.5714e-16, 2.7496e-17, 4.0229e-16, 2.0656e-16, 8.7764e-18, 8.2290e-17,\n 1.1302e-16, 2.9314e-16, 3.4139e-16, 5.4499e-16, 4.5864e-18, 5.1401e-16,\n 9.4148e-16, 2.9679e-17, 6.7218e-16, 1.7554e-17, 1.1517e-15, 9.8753e-18,\n 9.6601e-16, 8.7389e-16, 6.5669e-17, 4.7124e-17, 3.6330e-17, 2.2265e-18,\n 6.3592e-19, 1.0363e-15, 2.5492e-16, 1.1285e-15, 8.5407e-19, 7.3746e-17,\n 1.5834e-17, 6.4053e-18, 3.0127e-15, 5.2858e-17, 2.3914e-16, 4.3606e-18,\n 9.8606e-21, 2.1763e-17, 2.9093e-16, 1.9608e-16, 1.7823e-16, 1.0340e-15,\n 1.9210e-16, 1.0424e-17, 2.4869e-16, 1.0612e-15, 2.1401e-16, 7.0365e-17,\n 1.3832e-16, 1.4278e-18, 4.3718e-19, 5.6073e-17, 2.0472e-16, 3.5944e-18,\n 1.6974e-15, 5.1798e-19, 3.7596e-18, 1.6992e-16, 2.6592e-15, 2.6831e-18,\n 1.8382e-16, 1.2363e-17, 1.4478e-16, 3.4894e-17, 3.9916e-16, 2.1545e-15,\n 4.1321e-16, 1.0590e-16, 3.0832e-18, 2.5261e-16, 1.6478e-15, 7.6493e-15,\n 6.1429e-18, 1.9367e-17, 1.0814e-16, 3.0513e-16, 3.1701e-16, 6.5509e-17,\n 2.5259e-16, 2.0793e-16, 1.3713e-15, 2.2168e-17, 8.3031e-19, 2.6170e-16,\n 1.4473e-16, 9.6056e-16, 1.0688e-15, 1.5839e-17, 5.0140e-18, 1.0651e-17,\n 4.0457e-18, 3.3694e-16, 3.3428e-16, 9.4150e-17, 3.6333e-16, 1.8067e-17,\n 2.1875e-15, 7.9080e-16, 1.8626e-18, 1.2726e-16, 5.9262e-16, 1.1736e-17,\n 4.5179e-20, 3.7082e-18, 2.6850e-21, 7.0343e-17, 2.6998e-17, 1.0288e-16,\n 5.9925e-16, 1.4076e-16, 9.2919e-17, 8.0691e-17, 4.4089e-16, 1.0119e-17,\n 8.9050e-17, 1.0358e-18, 1.4154e-17, 1.4073e-15, 5.7538e-16, 5.3515e-17,\n 1.2529e-15, 1.5942e-17, 1.4495e-19, 4.4215e-16, 9.9316e-17, 7.3865e-18,\n 2.8650e-17, 2.6168e-16, 3.9707e-17, 3.1254e-18, 1.3898e-17, 1.2974e-16,\n 3.7841e-19, 3.0352e-15, 9.0744e-16, 2.6746e-15, 1.5616e-17, 1.1677e-16,\n 3.5636e-15, 4.5911e-20, 7.4998e-17, 9.2680e-15, 1.2147e-15, 2.9211e-16,\n 2.2659e-15, 2.0851e-17, 3.4940e-17, 3.9699e-16, 2.5655e-16, 2.9822e-17,\n 8.5193e-17, 2.5990e-15, 3.6161e-18, 1.2823e-18, 3.6655e-15, 1.3542e-15,\n 1.4003e-18, 6.9404e-18, 1.6808e-18, 7.9506e-17, 3.3395e-16, 5.5723e-19,\n 1.2802e-17, 2.0621e-15, 4.6749e-17, 5.6602e-19, 6.9119e-19, 4.0009e-16,\n 7.8620e-16, 5.2658e-16, 4.8085e-16, 7.9576e-17, 2.0846e-19, 2.2336e-15,\n 3.5262e-16, 6.6161e-17, 8.0213e-17, 8.0261e-17, 3.3348e-17, 1.1205e-16,\n 2.1237e-19, 1.3484e-17, 3.2174e-17, 4.9203e-17, 3.6621e-18, 7.2260e-16,\n 1.5079e-16, 1.6505e-17, 2.5977e-17, 1.4893e-16, 5.4541e-15, 1.3337e-18,\n 3.8002e-17, 4.2656e-18, 1.5766e-15, 2.5149e-15, 1.5257e-19, 1.5748e-16,\n 4.0330e-17, 3.5482e-16, 2.3669e-18, 3.1997e-18, 1.0401e-15, 1.1674e-15,\n 5.9306e-19, 1.2484e-18, 1.7379e-19, 2.2409e-17, 1.9707e-16, 3.8593e-17,\n 1.0877e-17, 6.4220e-16, 1.7665e-16, 1.3785e-20, 8.7327e-16, 5.2567e-19,\n 1.0326e-15, 8.9268e-16, 1.8459e-15, 1.5925e-15, 5.5806e-17, 6.7123e-18,\n 8.6700e-18, 6.1103e-16, 1.5297e-15, 3.0423e-17, 1.8772e-15, 2.6737e-18,\n 2.1775e-16, 5.4708e-15, 2.7923e-19, 4.0522e-15], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1978e-13, 5.8178e-14, 8.3276e-14, ..., 1.0607e-14, 6.4743e-14,\n 1.1955e-13],\n [2.2694e-14, 5.8987e-15, 8.8311e-15, ..., 1.1760e-15, 6.5368e-15,\n 1.1852e-14],\n [2.4722e-14, 6.3736e-15, 9.2015e-15, ..., 1.1324e-15, 7.3380e-15,\n 1.3708e-14],\n [2.5901e-14, 7.1510e-15, 9.7372e-15, ..., 1.2282e-15, 7.7324e-15,\n 1.4356e-14]], device='cuda:0')" }, "39": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.5317e-18, 5.4138e-17, 1.0972e-17, ..., 4.2836e-20, 5.2296e-18,\n 1.6872e-16],\n [2.1395e-17, 3.0482e-18, 8.6978e-19, ..., 2.1201e-17, 2.8831e-20,\n 2.4180e-16],\n [3.4632e-19, 8.7395e-17, 2.2055e-18, ..., 1.4663e-18, 5.4572e-18,\n 1.3583e-16],\n ...,\n [4.1720e-16, 4.4983e-17, 7.2701e-16, ..., 2.9312e-17, 9.2256e-17,\n 7.4115e-15],\n [1.2768e-16, 1.2275e-16, 1.3964e-15, ..., 2.0240e-17, 2.7181e-17,\n 2.2678e-15],\n [3.9928e-16, 9.5726e-15, 1.2082e-13, ..., 2.7755e-15, 8.6156e-17,\n 6.7119e-14]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.5114e-12, 5.6788e-13, 6.1447e-13, 6.5638e-13], device='cuda:0')" }, "40": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.6966e-17, 1.6588e-16, 4.4671e-17, 1.6752e-17, 1.3781e-16, 3.8425e-17,\n 1.5381e-17, 5.0193e-17, 1.3287e-17, 3.6006e-17, 1.2185e-16, 3.0228e-17,\n 1.1192e-18, 1.4169e-17, 4.5144e-18, 2.2947e-18, 1.8502e-16, 6.2532e-17,\n 3.1264e-17, 1.6789e-16, 1.4646e-17, 2.7843e-16, 3.8346e-17, 2.1017e-17,\n 6.6676e-17, 1.1835e-16, 8.2837e-19, 3.2010e-18, 8.4754e-17, 1.4442e-16,\n 1.0432e-16, 1.1460e-16, 6.2988e-17, 1.4266e-16, 2.2534e-16, 6.8169e-16,\n 1.8904e-16, 2.8832e-17, 2.1625e-16, 1.4244e-16, 5.7821e-17, 8.9174e-17,\n 1.3664e-16, 7.2575e-17, 1.4118e-16, 5.8840e-17, 1.0767e-16, 4.5888e-17,\n 3.0375e-18, 3.9559e-16, 5.1806e-17, 8.1647e-17, 6.2290e-18, 3.7791e-16,\n 1.0602e-17, 6.5700e-17, 1.6078e-16, 3.3743e-18, 1.9780e-16, 4.0679e-17,\n 6.6051e-16, 3.9832e-16, 1.3779e-17, 4.6062e-17, 2.9912e-18, 1.0877e-17,\n 4.1366e-17, 2.1462e-17, 5.7891e-17, 2.5023e-16, 1.0182e-16, 5.2163e-17,\n 2.7328e-17, 7.6067e-17, 2.7270e-16, 4.0391e-16, 9.2727e-17, 2.2755e-17,\n 1.5150e-16, 1.5385e-17, 3.0178e-16, 3.2557e-17, 8.0388e-18, 5.5322e-18,\n 6.6048e-18, 2.4507e-17, 5.8360e-16, 7.1349e-17, 2.5478e-18, 1.4578e-16,\n 1.8543e-18, 8.7105e-17, 1.1637e-17, 7.7419e-16, 1.3351e-18, 5.7397e-17,\n 1.2498e-16, 9.2144e-19, 2.7399e-18, 7.4327e-17, 2.1730e-16, 6.1486e-19,\n 3.2488e-16, 1.9822e-17, 3.6854e-17, 3.4888e-17, 4.9045e-17, 2.9991e-16,\n 2.0047e-16, 1.1109e-16, 2.7507e-17, 7.7272e-17, 4.8660e-17, 5.8788e-17,\n 3.2716e-17, 7.4858e-17, 6.2680e-17, 1.1234e-16, 1.8198e-16, 7.5195e-16,\n 5.2975e-18, 7.0349e-17, 1.2824e-16, 3.7250e-18, 2.2362e-18, 2.7917e-18,\n 1.0024e-16, 4.3428e-16, 4.2378e-18, 7.3659e-18, 3.9038e-17, 3.4398e-16,\n 4.1664e-19, 2.2176e-16, 5.2582e-17, 8.9654e-17, 1.9192e-16, 6.2420e-18,\n 5.6900e-17, 1.1429e-18, 6.3837e-17, 1.0066e-17, 1.0879e-17, 1.6133e-17,\n 2.6180e-18, 1.2602e-19, 3.6308e-17, 2.2838e-17, 8.2168e-18, 1.1640e-17,\n 2.1602e-18, 9.6170e-18, 6.9722e-17, 1.3024e-16, 9.4283e-18, 9.2242e-18,\n 2.8024e-17, 1.0006e-17, 4.7368e-17, 7.3820e-18, 2.0084e-16, 1.4330e-18,\n 1.8996e-18, 2.7908e-16, 9.8721e-17, 8.0216e-16, 8.3103e-16, 1.2105e-16,\n 1.6555e-16, 9.3311e-16, 2.6750e-17, 3.2923e-17, 5.2761e-17, 3.2379e-16,\n 2.6206e-17, 2.5282e-16, 5.9345e-18, 1.2190e-16, 1.1111e-16, 1.3844e-15,\n 3.2141e-16, 5.2844e-16, 3.6995e-17, 8.0637e-17, 1.0014e-15, 2.0050e-17,\n 2.5917e-19, 9.3833e-19, 3.3751e-16, 8.4058e-17, 3.2237e-16, 1.5052e-16,\n 8.0847e-17, 1.3268e-17, 5.0942e-16, 5.0019e-16, 2.6114e-16, 7.8490e-17,\n 2.8188e-16, 1.8674e-18, 7.8174e-17, 1.8532e-16, 1.1415e-18, 2.4912e-18,\n 4.4936e-17, 3.7551e-16, 2.6215e-16, 4.0749e-17, 7.6331e-18, 3.5979e-19,\n 4.6100e-16, 2.2952e-16, 6.2034e-18, 4.0257e-18, 1.2860e-16, 4.1360e-17,\n 5.5914e-17, 2.3558e-16, 2.1063e-16, 5.1258e-16, 7.6766e-18, 8.7294e-17,\n 2.0032e-18, 5.9397e-16, 3.5808e-17, 2.0392e-17, 1.0384e-17, 7.2548e-18,\n 5.2915e-17, 8.4739e-18, 1.1050e-16, 1.2284e-17, 3.9298e-18, 6.4223e-17,\n 3.3936e-17, 5.4584e-18, 5.6131e-18, 3.1045e-17, 2.5787e-17, 1.1093e-17,\n 9.1999e-18, 1.2364e-17, 1.4595e-17, 2.0006e-17, 6.5579e-17, 1.5190e-17,\n 1.6044e-17, 9.4474e-18, 5.6556e-18, 2.6409e-17, 7.1613e-17, 9.4653e-17,\n 7.9142e-18, 6.2130e-17, 1.0751e-17, 8.1702e-17, 1.6083e-31, 1.3388e-32,\n 3.7503e-32, 8.2900e-33, 8.0968e-34, 2.2523e-32, 1.1336e-32, 3.4485e-32,\n 2.6079e-34, 2.4995e-32, 5.2338e-33, 1.0749e-32, 4.8178e-32, 1.8546e-33,\n 1.0926e-33, 2.8625e-33, 7.0027e-33, 2.3546e-33, 8.7283e-32, 1.7134e-32,\n 1.6439e-33, 1.5098e-34, 3.9211e-33, 2.6510e-32, 2.5388e-33, 5.9079e-34,\n 9.1985e-34, 6.5022e-33, 1.3916e-34, 4.7014e-33, 2.0991e-32, 1.9823e-32,\n 1.2198e-32, 6.8070e-34, 3.9919e-33, 4.6345e-33, 1.4152e-32, 3.1293e-33,\n 9.9585e-33, 2.2070e-32, 1.4005e-32, 8.6225e-33, 2.9880e-33, 2.4926e-32,\n 2.4644e-32, 4.1293e-33, 1.9203e-33, 3.8669e-33, 2.5207e-32, 7.5962e-33,\n 2.8675e-33, 6.7068e-34, 9.3428e-33, 1.0416e-32, 2.4558e-32, 2.5900e-32,\n 1.8471e-32, 1.0747e-31, 1.4019e-32, 1.0807e-32, 3.8348e-32, 1.9836e-32,\n 2.2561e-32, 2.1901e-32, 5.4401e-33, 6.9685e-33, 1.1271e-33, 3.7498e-33,\n 1.3707e-32, 3.7060e-33, 1.8769e-32, 1.2292e-33, 3.1528e-34, 1.1723e-32,\n 3.0592e-33, 1.0146e-32, 6.4828e-33, 5.8939e-33, 1.8038e-32, 7.5828e-33,\n 7.2342e-33, 5.7099e-32, 3.2952e-33, 4.6883e-33, 9.8562e-33, 6.2023e-34,\n 5.7902e-33, 1.6466e-34, 5.5781e-33, 4.6797e-34, 2.2861e-33, 1.4022e-32,\n 7.5280e-34, 6.1506e-33, 1.9526e-33, 5.2267e-33, 8.2570e-33, 4.0035e-33,\n 3.1994e-33, 6.8677e-33, 6.6010e-33, 1.4501e-33, 2.3473e-32, 2.5955e-32,\n 3.7139e-33, 9.4298e-33, 6.4531e-33, 2.7945e-32, 1.1135e-32, 3.7402e-33,\n 1.0933e-33, 5.9796e-33, 1.4568e-32, 1.4228e-33, 6.3385e-33, 4.3767e-34,\n 2.2032e-33, 6.1794e-33, 1.0177e-33, 3.2321e-33, 4.0331e-33, 5.0205e-33,\n 1.8243e-32, 7.3019e-33, 2.9211e-32, 4.1276e-32, 2.1191e-32, 5.8677e-32,\n 1.1416e-33, 1.1358e-33, 1.1397e-32, 4.1951e-33, 4.4069e-34, 1.1051e-34,\n 4.1413e-33, 1.0577e-33, 4.8861e-33, 3.0837e-33, 1.6079e-32, 2.0953e-33,\n 3.9034e-33, 4.2712e-32, 4.1939e-33, 7.3265e-34, 1.8655e-33, 5.7507e-34,\n 5.6648e-34, 6.9657e-33, 1.1938e-32, 1.3230e-32, 7.4248e-34, 1.8903e-32,\n 1.9559e-33, 1.8236e-33, 4.5624e-32, 9.0885e-33, 1.4720e-33, 6.1597e-34,\n 4.0239e-33, 1.7480e-33, 9.0262e-33, 2.8544e-34, 6.2573e-33, 1.4354e-33,\n 2.3581e-32, 2.9306e-33, 5.4753e-33, 2.0638e-32, 8.2916e-33, 4.0796e-33,\n 1.7169e-32, 6.5633e-33, 1.1090e-32, 1.0702e-33, 1.0336e-31, 1.4290e-33,\n 2.1881e-32, 3.4320e-33, 4.5739e-33, 1.1827e-32, 4.2376e-34, 2.7000e-33,\n 1.1441e-32, 7.7833e-33, 6.9742e-33, 6.2573e-33, 1.2045e-32, 2.2775e-33,\n 8.7310e-33, 9.4182e-33, 9.5657e-33, 8.9459e-33, 1.9631e-32, 3.3354e-32,\n 5.3400e-33, 1.4718e-33, 1.6724e-32, 4.6439e-33, 4.0113e-34, 1.0849e-33,\n 1.2692e-32, 2.8912e-32, 1.1660e-32, 5.1016e-33, 1.4064e-33, 2.4596e-34,\n 9.8020e-33, 1.6612e-32, 2.4862e-33, 9.6699e-34, 2.7463e-33, 3.7634e-33,\n 4.7316e-33, 2.7281e-33, 9.8180e-33, 1.3779e-32, 2.8228e-32, 2.8121e-33,\n 2.9537e-33, 7.9298e-33, 1.0029e-32, 6.3365e-33, 6.1498e-34, 1.8913e-32,\n 6.0549e-34, 2.7657e-33, 1.0074e-32, 9.6537e-34, 1.2174e-32, 1.8629e-32,\n 2.9036e-33, 1.8802e-32, 1.8509e-32, 1.0866e-32, 8.9254e-33, 1.1770e-32,\n 2.7740e-34, 8.1361e-33, 2.5167e-33, 6.8772e-33, 1.0289e-33, 3.4355e-33,\n 6.5864e-34, 4.2337e-32, 6.7981e-33, 1.0916e-32, 1.3107e-33, 3.9191e-33,\n 1.1438e-32, 4.3939e-32, 1.0480e-32, 1.8734e-32, 2.2582e-32, 1.7532e-32,\n 2.8138e-33, 3.7370e-33, 6.6286e-13, 2.4674e-14, 1.4530e-13, 9.5486e-15,\n 3.7574e-14, 1.2510e-14, 2.5602e-15, 5.2528e-14, 2.9003e-14, 1.1940e-14,\n 4.1460e-13, 8.0861e-15, 2.4152e-14, 3.2255e-13, 4.7450e-14, 6.4385e-14,\n 1.0707e-14, 3.9714e-16, 2.7190e-14, 3.8208e-14, 8.0428e-14, 5.8218e-14,\n 3.5079e-14, 8.6694e-15, 8.5247e-16, 6.9844e-15, 2.0000e-15, 5.0212e-14,\n 4.5444e-13, 1.3000e-14, 1.4527e-13, 3.4302e-14, 3.2568e-14, 8.2358e-14,\n 2.4153e-16, 1.9879e-13, 1.0179e-13, 3.2042e-14, 2.0812e-13, 4.9997e-15,\n 8.1662e-15, 1.5770e-14, 1.4811e-14, 4.0217e-14, 2.2891e-14, 6.8776e-16,\n 4.8923e-15, 5.2271e-14, 8.6383e-15, 1.0008e-13, 3.4314e-13, 7.2272e-14,\n 7.5964e-15, 1.6565e-13, 2.1202e-14, 1.1541e-13, 1.0886e-13, 1.7656e-14,\n 1.5656e-13, 1.3145e-14, 5.6586e-15, 1.0089e-16, 1.6368e-15, 2.7037e-15,\n 6.7283e-14, 4.0473e-14, 3.0891e-14, 1.2878e-14, 4.1062e-14, 6.0950e-15,\n 8.4267e-14, 1.8199e-13, 1.1101e-13, 2.7940e-15, 4.8161e-15, 1.4575e-13,\n 1.0492e-13, 2.8136e-13, 1.2647e-13, 9.3406e-15, 5.2454e-15, 1.0827e-14,\n 5.1697e-14, 1.9729e-14, 4.8920e-14, 9.0150e-15, 5.9413e-14, 6.8814e-15,\n 1.7527e-14, 1.9057e-13, 2.6496e-13, 2.1579e-14, 9.3692e-14, 6.4214e-14,\n 3.7937e-14, 9.4608e-14, 9.3037e-14, 6.2512e-15, 3.9151e-14, 3.1728e-14,\n 2.6386e-14, 2.5316e-13, 3.4589e-14, 2.1038e-13, 1.0887e-13, 2.0173e-14,\n 4.1976e-14, 4.9233e-15, 1.0403e-14, 1.0096e-15, 1.1278e-13, 1.1733e-14,\n 1.1014e-13, 1.5387e-14, 2.6704e-13, 6.7709e-14, 4.3636e-14, 2.3193e-13,\n 3.7297e-14, 2.2913e-14, 2.7127e-15, 3.6345e-14, 5.4532e-14, 1.3259e-14,\n 1.3403e-14, 5.5905e-14, 1.1113e-14, 5.8148e-15, 1.3471e-13, 5.4671e-15,\n 7.6242e-15, 8.6972e-15, 9.4632e-15, 3.7769e-14, 5.2557e-14, 2.3657e-14,\n 2.3373e-14, 1.5390e-15, 4.9349e-14, 3.4440e-14, 5.9683e-15, 3.0300e-14,\n 1.0621e-14, 6.6430e-15, 6.8680e-14, 1.5882e-14, 1.6931e-16, 4.5751e-14,\n 3.4375e-14, 1.0620e-14, 9.4867e-14, 5.3798e-14, 1.7807e-14, 7.3182e-15,\n 1.7954e-13, 6.3919e-15, 6.5098e-15, 7.3054e-14, 2.7214e-14, 6.5996e-14,\n 6.3313e-14, 2.2623e-14, 9.7414e-14, 5.8489e-14, 4.8636e-15, 9.3510e-15,\n 1.2444e-16, 1.0849e-14, 6.8767e-14, 9.9613e-17, 3.5455e-13, 1.8147e-16,\n 2.1420e-14, 8.5375e-15, 1.9411e-13, 7.4940e-14, 3.3329e-14, 7.0230e-14,\n 1.8022e-13, 2.7443e-14, 3.1021e-15, 1.2850e-14, 2.1357e-14, 1.1475e-14,\n 3.7665e-15, 8.2377e-15, 1.4743e-14, 3.3548e-15, 1.5681e-13, 9.4284e-16,\n 3.4141e-14, 7.0651e-15, 2.4999e-13, 1.2239e-13, 5.1164e-15, 1.7292e-14,\n 9.6007e-14, 4.7877e-15, 4.8039e-15, 1.1341e-13, 2.6893e-14, 5.5490e-14,\n 1.6385e-14, 7.5474e-14, 8.4246e-15, 1.9134e-14, 9.0050e-14, 4.2267e-15,\n 7.3612e-14, 3.4650e-14, 1.2216e-14, 5.5219e-14, 2.6607e-13, 5.4126e-15,\n 2.4734e-14, 4.6023e-14, 2.1064e-16, 3.8325e-14, 3.5099e-14, 8.3042e-14,\n 3.3392e-15, 5.7199e-14, 1.2090e-14, 1.9046e-14, 5.3889e-14, 2.1417e-16,\n 8.5955e-14, 1.4433e-14, 1.1962e-13, 4.0495e-14, 5.5785e-14, 1.0088e-14,\n 2.6292e-14, 9.4973e-15, 1.6467e-13, 3.2897e-14, 2.2399e-14, 1.5670e-13,\n 1.4136e-13, 1.3518e-13, 2.8630e-14, 1.3703e-13, 4.9768e-15, 5.8462e-16,\n 1.3095e-14, 6.1774e-15, 3.8025e-15, 8.5779e-15, 1.0775e-13, 3.5553e-15,\n 9.3325e-15, 4.8846e-14, 1.8036e-14, 6.6844e-15, 2.7958e-15, 1.2582e-13],\n device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1978e-13, 5.8178e-14, 8.3276e-14, ..., 1.0607e-14, 6.4743e-14,\n 1.1955e-13],\n [2.2694e-14, 5.8987e-15, 8.8311e-15, ..., 1.1760e-15, 6.5368e-15,\n 1.1852e-14],\n [2.4722e-14, 6.3736e-15, 9.2015e-15, ..., 1.1324e-15, 7.3380e-15,\n 1.3708e-14],\n [2.5901e-14, 7.1510e-15, 9.7372e-15, ..., 1.2282e-15, 7.7324e-15,\n 1.4356e-14]], device='cuda:0')" }, "41": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3362e-14, 9.7789e-15, 3.1522e-15, ..., 5.0424e-14, 1.7409e-15,\n 1.9647e-14],\n [6.3028e-15, 2.5797e-15, 1.4856e-15, ..., 1.1854e-14, 5.6312e-16,\n 1.0294e-14],\n [1.4633e-15, 8.6494e-17, 3.6241e-16, ..., 4.6503e-16, 1.1620e-16,\n 4.8688e-16],\n ...,\n [5.3478e-15, 3.8623e-16, 1.4412e-15, ..., 2.3776e-15, 3.3236e-16,\n 1.2122e-15],\n [2.7646e-15, 4.5941e-16, 6.8656e-16, ..., 2.3325e-15, 1.7383e-16,\n 5.6295e-16],\n [6.7096e-15, 2.0109e-15, 1.5540e-15, ..., 8.6975e-15, 5.8815e-16,\n 6.2578e-15]], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.5114e-12, 5.6788e-13, 6.1447e-13, 6.5638e-13], device='cuda:0')" }, "42": { - "step": "tensor(15024.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.9381e-13, 1.5156e-13, 1.3399e-15, 5.8396e-13, 1.8035e-13, 1.5537e-14,\n 2.1147e-14, 3.2561e-14, 3.6870e-15, 8.0496e-14, 1.0349e-13, 1.1830e-13,\n 1.7621e-13, 3.5232e-14, 1.9453e-13, 8.7536e-14, 3.7180e-13, 5.6249e-14,\n 5.7222e-15, 4.5377e-13, 5.7645e-14, 3.9596e-14, 1.1889e-13, 4.7274e-13,\n 2.5836e-14, 6.4481e-13, 1.1542e-13, 1.2006e-13, 6.2787e-15, 3.7353e-14,\n 5.3756e-13, 1.3597e-14, 3.1503e-13, 1.0281e-14, 2.0820e-13, 4.2743e-15,\n 8.0245e-14, 1.7017e-13, 1.0459e-13, 9.8806e-14, 6.5893e-15, 1.3844e-13,\n 4.7636e-13, 1.2810e-14, 2.2394e-13, 1.6641e-14, 1.2776e-14, 4.7796e-15,\n 1.4501e-13, 4.5287e-14, 5.5226e-14, 3.0475e-14, 2.3391e-14, 6.2930e-14,\n 9.6386e-13, 1.0118e-14, 2.8044e-14, 1.4152e-14, 3.0458e-15, 2.4219e-13,\n 7.5356e-14, 2.7358e-15, 1.5814e-13, 6.5105e-13, 2.6669e-14, 4.0481e-14,\n 2.9152e-13, 4.5613e-15, 9.1081e-13, 9.3221e-15, 1.3076e-13, 9.1903e-14,\n 6.2205e-14, 3.2509e-15, 2.1921e-14, 2.8368e-14, 1.5376e-13, 5.2475e-14,\n 8.3884e-14, 1.6189e-13, 1.7557e-14, 1.6779e-14, 1.5072e-12, 5.0548e-14,\n 3.3598e-15, 6.2889e-15, 1.2867e-14, 6.7566e-15, 1.3181e-13, 1.0382e-14,\n 3.5063e-15, 2.6299e-13, 1.7793e-14, 5.0245e-13, 1.1924e-13, 1.5884e-14,\n 3.6842e-13, 1.3314e-13, 6.0866e-13, 4.5519e-15, 6.9226e-14, 6.1695e-13,\n 8.1015e-13, 1.3545e-12, 1.6823e-14, 5.8993e-14, 3.5309e-15, 2.1556e-13,\n 1.0543e-13, 5.3326e-15, 9.4565e-15, 1.0251e-13, 1.1565e-14, 1.7782e-14,\n 1.5628e-14, 8.4123e-14, 8.7636e-15, 6.8210e-14, 7.6818e-15, 6.1388e-14,\n 4.9630e-13, 2.8864e-15, 1.2223e-12, 4.5983e-14, 8.8846e-13, 8.1606e-15,\n 2.9097e-13, 3.5726e-13, 4.1322e-13, 3.4306e-14, 4.3862e-13, 3.3363e-13,\n 5.3444e-14, 4.0217e-15, 5.3448e-15, 4.7245e-13, 4.1278e-13, 2.8735e-13,\n 5.0912e-14, 7.9680e-15, 8.6560e-14, 5.7018e-13, 1.4800e-13, 2.2134e-13,\n 1.1574e-13, 2.5815e-15, 1.4533e-13, 1.0446e-13, 9.8505e-15, 4.2214e-15,\n 2.2917e-15, 3.8223e-13, 1.4731e-14, 4.6185e-14, 3.0061e-13, 6.3178e-13,\n 2.0223e-13, 4.6102e-13, 5.2917e-14, 3.9037e-13, 4.1579e-13, 3.5450e-13,\n 1.6224e-13, 5.6022e-13, 2.8445e-15, 7.0580e-13, 7.7496e-14, 8.9343e-14,\n 5.1035e-14, 7.1107e-15, 1.4517e-14, 3.3413e-15, 7.4503e-14, 2.1685e-14,\n 1.0512e-13, 3.3109e-13, 1.7753e-13, 5.8317e-15, 1.6792e-13, 3.7423e-15,\n 7.7538e-15, 2.2362e-13, 3.9778e-15, 9.2419e-14, 5.7402e-15, 1.4386e-13,\n 2.3855e-13, 6.6059e-14, 1.0710e-13, 2.3041e-13, 4.1730e-13, 2.5695e-13,\n 2.5218e-13, 4.6049e-15, 3.5097e-14, 5.2765e-15, 7.2809e-15, 5.9550e-14,\n 2.6943e-15, 7.8823e-14, 2.4075e-13, 2.7252e-13, 3.5292e-14, 1.0440e-13,\n 3.3259e-14, 5.4827e-13, 5.7592e-15, 3.9813e-13, 1.6174e-14, 3.7016e-14,\n 1.1361e-14, 9.6007e-14, 1.0427e-14, 5.5777e-14, 4.1733e-13, 6.6476e-14,\n 3.4094e-14, 4.7466e-15, 1.1135e-14, 3.2240e-14, 1.8914e-13, 8.3931e-15,\n 3.8529e-15, 8.6891e-14, 2.2028e-13, 1.1946e-12, 1.3189e-13, 2.7842e-14,\n 2.3149e-14, 6.6671e-13, 2.3307e-13, 4.4445e-14, 7.4037e-13, 2.1791e-14,\n 3.3090e-14, 8.0399e-14, 3.4222e-14, 1.9944e-13, 3.4857e-14, 6.0540e-16,\n 3.1833e-13, 2.0448e-14, 2.8988e-13, 3.6903e-14, 5.6965e-15, 1.0653e-14,\n 3.5634e-13, 1.3597e-14, 5.1568e-13, 6.0085e-13, 3.3606e-13, 6.1075e-14,\n 1.2708e-13, 1.0518e-14, 1.5520e-14, 1.0828e-13], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1978e-13, 5.8178e-14, 8.3276e-14, ..., 1.0607e-14, 6.4743e-14,\n 1.1955e-13],\n [2.2694e-14, 5.8987e-15, 8.8311e-15, ..., 1.1760e-15, 6.5368e-15,\n 1.1852e-14],\n [2.4722e-14, 6.3736e-15, 9.2015e-15, ..., 1.1324e-15, 7.3380e-15,\n 1.3708e-14],\n [2.5901e-14, 7.1510e-15, 9.7372e-15, ..., 1.2282e-15, 7.7324e-15,\n 1.4356e-14]], device='cuda:0')" }, "43": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1789e-13, 7.4095e-13, 9.2588e-14, ..., 1.9178e-12, 1.8071e-12,\n 8.3559e-13],\n [7.8652e-15, 5.0606e-14, 5.9840e-15, ..., 1.3333e-13, 1.2995e-13,\n 5.5978e-14],\n [5.4892e-15, 3.1846e-14, 4.3003e-15, ..., 8.2943e-14, 7.3756e-14,\n 3.8631e-14],\n [8.6083e-15, 5.7671e-14, 6.8485e-15, ..., 1.4769e-13, 1.4319e-13,\n 6.1172e-14],\n [8.4917e-15, 4.8583e-14, 6.7482e-15, ..., 1.2606e-13, 1.1142e-13,\n 5.9735e-14]], device='cuda:0')" - }, - "44": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([4.2139e-11, 2.9756e-12, 1.7463e-12, 3.3413e-12, 2.6287e-12],\n device='cuda:0')" - }, - "45": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1808e-13, 7.4129e-13, 9.2731e-14, ..., 1.9191e-12, 1.8071e-12,\n 8.3686e-13],\n [7.8690e-15, 5.0613e-14, 5.9867e-15, ..., 1.3335e-13, 1.2995e-13,\n 5.6002e-14],\n [5.5078e-15, 3.1877e-14, 4.3140e-15, ..., 8.3064e-14, 7.3756e-14,\n 3.8752e-14],\n [8.6128e-15, 5.7679e-14, 6.8517e-15, ..., 1.4772e-13, 1.4319e-13,\n 6.1201e-14],\n [8.5235e-15, 4.8638e-14, 6.7718e-15, ..., 1.2627e-13, 1.1142e-13,\n 5.9945e-14]], device='cuda:0')" - }, - "46": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([4.2142e-11, 2.9757e-12, 1.7466e-12, 3.3414e-12, 2.6292e-12],\n device='cuda:0')" - }, - "47": { - "step": "tensor(15024.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1789e-13, 7.4095e-13, 9.2588e-14, ..., 1.9178e-12, 1.8071e-12,\n 8.3559e-13],\n [7.8652e-15, 5.0606e-14, 5.9840e-15, ..., 1.3333e-13, 1.2995e-13,\n 5.5978e-14],\n [5.4892e-15, 3.1846e-14, 4.3003e-15, ..., 8.2943e-14, 7.3756e-14,\n 3.8631e-14],\n [8.6083e-15, 5.7671e-14, 6.8485e-15, ..., 1.4769e-13, 1.4319e-13,\n 6.1172e-14],\n [8.4917e-15, 4.8583e-14, 6.7482e-15, ..., 1.2606e-13, 1.1142e-13,\n 5.9735e-14]], device='cuda:0')" - }, - "48": { - "step": "tensor(15024.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([4.2139e-11, 2.9756e-12, 1.7463e-12, 3.3413e-12, 2.6287e-12],\n device='cuda:0')" - }, - "6": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 1.3580e-06, -3.5045e-06, 2.5812e-06, ..., -4.4712e-06,\n 3.5317e-06, -2.9484e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 7.9142e-17, -6.4181e-17, 1.0515e-17, ..., 3.8766e-17,\n 6.9982e-17, 1.3136e-18],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.5620e-08, 1.4753e-08, 2.8627e-09, ..., 1.5496e-09, 1.9261e-09,\n 2.1679e-09],\n [4.1372e-13, 1.5587e-13, 1.6427e-14, ..., 3.3962e-14, 1.1404e-13,\n 1.7047e-14],\n [3.1195e-12, 3.6182e-13, 1.9616e-13, ..., 6.0013e-14, 4.3665e-13,\n 3.3489e-14],\n ...,\n [2.8430e-11, 1.2244e-10, 2.1425e-11, ..., 1.5088e-11, 1.6014e-11,\n 1.2388e-11],\n [1.7339e-12, 3.4031e-13, 2.9272e-13, ..., 3.2417e-14, 9.0515e-13,\n 9.4395e-15],\n [2.4094e-12, 2.8855e-12, 1.0601e-13, ..., 1.2391e-13, 4.0752e-13,\n 3.0613e-14]], device='cuda:0')" - }, - "7": { - "step": "tensor(6260.)", - "exp_avg": "tensor([2.1816e-05, 5.7453e-44, 5.6052e-45, ..., 5.6052e-45, 9.8458e-16,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.0349e-06, 6.5905e-11, 3.6901e-10, ..., 2.7046e-08, 8.7395e-11,\n 6.7145e-10], device='cuda:0')" + "step": "tensor(17528.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.5114e-12, 5.6788e-13, 6.1447e-13, 6.5638e-13], device='cuda:0')" }, "8": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[-1.0743e-07, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 2.8922e-18, -5.6052e-45],\n [-4.3107e-07, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -3.3245e-17, 5.6052e-45],\n [-3.7153e-07, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 4.2623e-18, -5.6052e-45],\n ...,\n [-1.6203e-06, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 4.0408e-18, -5.6052e-45],\n [ 2.4014e-07, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -3.9954e-18, -5.6052e-45],\n [ 3.5377e-07, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.8931e-18, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2137e-10, 2.4383e-13, 4.8485e-13, ..., 6.4746e-12, 9.2502e-13,\n 7.2239e-13],\n [1.2870e-10, 2.9037e-13, 5.4296e-13, ..., 7.9502e-12, 1.2171e-12,\n 1.2238e-12],\n [1.1602e-10, 2.4762e-13, 5.0051e-13, ..., 1.4623e-11, 1.3617e-12,\n 1.5762e-12],\n ...,\n [2.3771e-10, 4.2176e-13, 1.0072e-12, ..., 1.0827e-11, 2.1526e-12,\n 2.2893e-12],\n [1.3587e-10, 5.9602e-13, 9.6503e-13, ..., 7.0445e-12, 2.1643e-12,\n 1.7156e-12],\n [1.9252e-10, 2.2450e-13, 5.0488e-13, ..., 9.2692e-12, 1.4715e-12,\n 2.6787e-12]], device='cuda:0')" + "step": "tensor(16276.)", + "exp_avg": "tensor([[-3.4364e-08, -9.2831e-08, 1.5523e-13, ..., -6.3607e-07,\n -1.3220e-06, -7.8396e-08],\n [ 2.2508e-08, 1.8523e-08, 3.2556e-11, ..., -2.4038e-06,\n -4.5014e-06, 1.4241e-07],\n [-6.1799e-08, 1.0531e-07, 1.8474e-13, ..., 6.2407e-08,\n -7.7672e-08, 7.4684e-08],\n ...,\n [ 3.3594e-07, 2.7919e-07, -8.3245e-20, ..., -1.2542e-08,\n 6.1589e-07, -2.2348e-09],\n [ 1.5321e-07, 7.8496e-09, -6.2522e-11, ..., -4.4860e-07,\n 8.1141e-07, 1.8201e-06],\n [-6.0562e-07, -2.8920e-08, -2.0487e-09, ..., -1.4022e-07,\n 2.5388e-08, 4.7622e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7144e-11, 8.6221e-12, 2.3319e-14, ..., 9.7159e-12, 1.4987e-11,\n 3.0018e-11],\n [4.5613e-11, 5.4808e-12, 1.1080e-14, ..., 6.0550e-12, 1.6417e-10,\n 1.1534e-11],\n [2.5768e-11, 7.5705e-12, 1.7028e-12, ..., 7.5281e-12, 2.2777e-11,\n 1.3847e-11],\n ...,\n [1.9068e-11, 3.9390e-11, 6.3317e-15, ..., 7.0090e-13, 1.5150e-10,\n 7.6307e-13],\n [2.3078e-11, 7.0974e-12, 2.1177e-12, ..., 1.2967e-11, 3.4253e-11,\n 6.1767e-12],\n [5.7234e-11, 1.5028e-11, 5.7234e-12, ..., 5.4826e-11, 9.5138e-13,\n 6.3277e-13]], device='cuda:0')" }, "9": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 2.1911e-05, -1.6794e-05, 4.9263e-06, ..., 7.0992e-06,\n 1.1760e-06, -1.3618e-06],\n [-8.2102e-08, -4.1431e-08, -6.1162e-08, ..., -1.3579e-08,\n -3.2129e-08, 1.2339e-08],\n [-1.2575e-05, 1.0503e-05, -5.0192e-06, ..., 1.0514e-06,\n -7.2553e-06, 4.0623e-06],\n ...,\n [-3.7279e-06, 3.8383e-06, -1.3771e-06, ..., 3.9692e-06,\n 5.6483e-06, 3.8086e-06],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 2.1790e-05, -1.9865e-07, 2.6076e-06, ..., 7.9457e-06,\n 2.0450e-06, 1.9448e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0423e-08, 4.5684e-09, 8.0826e-10, ..., 7.7086e-10, 1.2169e-09,\n 5.2207e-10],\n [1.2813e-09, 8.9943e-10, 1.1543e-10, ..., 1.6185e-10, 1.4719e-10,\n 1.3916e-10],\n [4.7626e-09, 4.2152e-09, 6.9804e-10, ..., 4.7971e-10, 1.4073e-09,\n 4.3562e-10],\n ...,\n [1.8905e-09, 4.6136e-09, 4.9647e-10, ..., 6.6191e-10, 6.3644e-10,\n 4.6638e-10],\n [2.1721e-09, 1.4585e-09, 1.6568e-10, ..., 1.9495e-10, 2.3719e-10,\n 1.2959e-10],\n [6.7633e-09, 3.5405e-09, 8.1927e-10, ..., 2.0514e-09, 1.1147e-09,\n 8.5538e-10]], device='cuda:0')" + "step": "tensor(16276.)", + "exp_avg": "tensor([-1.7535e-05, -1.2534e-05, 3.1191e-06, ..., 3.0679e-06,\n 1.2635e-05, 1.6326e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.7333e-09, 2.0176e-09, 1.6992e-09, ..., 2.0147e-09, 1.0689e-09,\n 1.9781e-09], device='cuda:0')" }, "10": { - "step": "tensor(6260.)", - "exp_avg": "tensor([ 2.0188e-04, 1.9911e-06, -2.0124e-04, ..., 1.2104e-05,\n 5.6052e-45, 3.8372e-04], device='cuda:0')", - "exp_avg_sq": "tensor([1.4562e-06, 2.9553e-07, 9.7905e-07, ..., 8.9326e-07, 2.9173e-07,\n 2.2973e-06], device='cuda:0')" + "step": "tensor(16276.)", + "exp_avg": "tensor([[-4.3566e-08, 2.6244e-07, -5.5053e-08, ..., -1.0841e-08,\n 1.3778e-07, 1.8915e-07],\n [-2.1215e-08, 5.2248e-08, 2.2414e-07, ..., 5.5742e-08,\n -1.9444e-07, -3.9794e-08],\n [-4.9139e-08, -1.0579e-07, -6.1052e-08, ..., -2.6699e-07,\n -1.1428e-07, -1.3690e-07],\n ...,\n [-1.2979e-07, 2.9015e-07, 3.8181e-07, ..., 7.4835e-08,\n 1.9361e-07, 1.5663e-07],\n [-8.9143e-08, -2.3857e-07, 5.3293e-08, ..., -1.1994e-07,\n -1.2381e-07, 5.7326e-08],\n [-9.5380e-08, 2.1658e-07, -1.9441e-07, ..., 7.2200e-08,\n 1.7228e-07, 2.2568e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.9922e-13, 4.5582e-13, 1.2490e-12, ..., 5.1921e-13, 6.9179e-13,\n 7.2088e-13],\n [6.1311e-13, 1.2740e-12, 9.4645e-13, ..., 1.2565e-12, 9.6572e-13,\n 9.8398e-13],\n [1.2130e-12, 1.8475e-12, 9.8965e-13, ..., 1.2489e-12, 1.4801e-12,\n 1.7515e-12],\n ...,\n [9.7099e-13, 1.6387e-12, 1.1496e-12, ..., 1.3926e-12, 1.2988e-12,\n 1.7597e-12],\n [7.4917e-13, 1.6134e-12, 1.0166e-12, ..., 1.5747e-12, 7.9692e-13,\n 8.7584e-13],\n [7.0653e-13, 1.8064e-12, 1.5091e-12, ..., 2.6273e-12, 1.3800e-12,\n 8.0870e-13]], device='cuda:0')" }, "11": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 8.8573e-07, 7.7643e-09, 2.0079e-07, ..., -3.7528e-07,\n -5.6052e-45, 2.7903e-07],\n [-1.6968e-06, -2.6760e-09, 1.2471e-06, ..., -8.9085e-07,\n -5.6052e-45, 8.8673e-07],\n [ 1.6751e-06, 2.0632e-09, 7.1978e-07, ..., -4.0868e-08,\n -5.6052e-45, -1.5715e-06],\n ...,\n [-6.3517e-07, 4.2686e-09, 1.8244e-06, ..., -3.1623e-07,\n 5.6052e-45, -3.6818e-07],\n [-6.4864e-07, 1.9920e-09, -1.2747e-06, ..., 6.7823e-08,\n -5.6052e-45, 1.1517e-06],\n [-2.4732e-06, 9.3278e-10, -8.9123e-07, ..., -8.3761e-07,\n -5.6052e-45, -7.1626e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0093e-10, 1.5431e-11, 3.4014e-11, ..., 2.1362e-11, 1.5190e-11,\n 6.0224e-11],\n [9.8124e-11, 2.7816e-11, 5.5354e-11, ..., 5.9353e-11, 2.7408e-11,\n 5.5761e-11],\n [1.7947e-10, 3.1413e-11, 6.5317e-11, ..., 5.0744e-11, 2.8535e-11,\n 5.9453e-11],\n ...,\n [2.4780e-10, 2.8416e-11, 6.3586e-11, ..., 5.7939e-11, 3.0434e-11,\n 5.2552e-11],\n [1.4494e-10, 3.6085e-11, 6.3743e-11, ..., 4.0601e-11, 3.4507e-11,\n 7.3608e-11],\n [1.2538e-10, 3.3822e-11, 6.4830e-11, ..., 6.5413e-11, 2.9750e-11,\n 6.9547e-11]], device='cuda:0')" + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 8.9125e-07, 4.9028e-07, 1.0837e-13, ..., -3.0119e-07,\n -3.1830e-07, 1.3053e-09],\n [-2.6223e-07, 7.7601e-08, 1.2293e-15, ..., 5.9846e-08,\n -1.9305e-08, -3.6038e-07],\n [ 1.0779e-06, 2.6104e-08, 3.4124e-14, ..., 1.1705e-06,\n 2.1372e-06, 6.2161e-16],\n ...,\n [-9.5957e-07, -4.1943e-08, -2.0585e-13, ..., 3.8321e-07,\n 1.2033e-07, -1.9039e-09],\n [ 6.3138e-08, -2.8106e-08, -4.9812e-16, ..., -7.5387e-07,\n 1.1148e-06, -9.5673e-11],\n [ 7.8423e-09, -4.0979e-09, 1.8948e-07, ..., 5.7422e-07,\n -3.5406e-07, 2.5700e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.2888e-12, 2.2728e-11, 6.5493e-13, ..., 2.3384e-11, 1.0713e-11,\n 1.1176e-11],\n [2.3834e-11, 3.4965e-13, 1.2475e-13, ..., 1.0582e-11, 4.1227e-11,\n 2.1571e-11],\n [3.2925e-11, 1.0267e-11, 7.8385e-14, ..., 4.6308e-12, 5.9622e-11,\n 1.9099e-13],\n ...,\n [1.0711e-10, 7.2182e-13, 1.4087e-13, ..., 1.1933e-12, 2.2104e-11,\n 6.3355e-13],\n [3.4738e-12, 2.9826e-12, 1.7343e-13, ..., 8.8601e-11, 2.9773e-10,\n 1.7209e-12],\n [2.4329e-12, 1.7478e-11, 1.5732e-12, ..., 7.0101e-11, 1.9348e-11,\n 9.0413e-13]], device='cuda:0')" }, "12": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-2.7538e-06, 3.6483e-06, 3.9246e-06, ..., 3.3478e-06,\n -7.2741e-08, 8.4253e-08],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-1.7676e-05, 9.5842e-06, -9.4442e-07, ..., -4.2620e-06,\n -8.3650e-06, 1.1469e-06],\n ...,\n [-8.8349e-06, 9.7421e-06, -3.1404e-06, ..., -4.2425e-07,\n -3.7831e-06, -1.5755e-06],\n [ 3.1605e-05, -3.0500e-05, 6.2374e-06, ..., 3.6976e-06,\n 1.4000e-05, -5.2827e-06],\n [-7.5985e-06, 5.4368e-06, -1.2171e-06, ..., 1.1464e-05,\n 6.2879e-07, -9.5634e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.0568e-10, 9.0237e-10, 1.3172e-10, ..., 2.3996e-10, 1.7070e-10,\n 1.1732e-10],\n [2.5778e-11, 7.0474e-11, 7.3739e-16, ..., 3.9139e-12, 7.7569e-12,\n 4.2439e-12],\n [1.8447e-09, 7.8232e-10, 1.3948e-10, ..., 2.0912e-10, 2.6828e-10,\n 8.8962e-11],\n ...,\n [1.9542e-09, 7.7890e-10, 8.9360e-11, ..., 1.3177e-10, 4.4129e-10,\n 8.5544e-11],\n [1.1049e-09, 1.2446e-09, 2.1593e-10, ..., 1.3428e-10, 4.3925e-10,\n 8.0107e-11],\n [1.0302e-09, 1.0059e-09, 1.6025e-10, ..., 1.4430e-10, 3.1971e-10,\n 2.1210e-10]], device='cuda:0')" + "step": "tensor(15024.)", + "exp_avg": "tensor([-2.7423e-06, -5.6893e-06, 3.0159e-06, ..., 2.4269e-06,\n 3.6942e-06, 1.0367e-06], device='cuda:0')", + "exp_avg_sq": "tensor([6.7650e-10, 8.2475e-10, 1.3495e-09, ..., 1.4000e-09, 1.6227e-09,\n 1.0370e-09], device='cuda:0')" }, "13": { - "step": "tensor(3756.)", - "exp_avg": "tensor([ 1.4594e-05, 5.6052e-45, -1.5783e-04, ..., -1.1433e-04,\n 3.5018e-04, -3.1295e-04], device='cuda:0')", - "exp_avg_sq": "tensor([2.5368e-07, 3.4849e-08, 2.3720e-07, ..., 2.3299e-07, 2.4226e-07,\n 4.4061e-07], device='cuda:0')" - }, - "14": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-4.1473e-07, -5.6052e-45, 1.4058e-07, ..., -1.5282e-07,\n -4.1470e-07, 5.2540e-07],\n [-6.8656e-07, -5.6052e-45, -3.5424e-07, ..., -3.2235e-08,\n -2.1843e-07, 6.5575e-09],\n [-3.4350e-08, -5.6052e-45, 1.3350e-07, ..., -4.7222e-07,\n -1.0342e-08, 1.6750e-07],\n ...,\n [-4.2450e-07, -5.6052e-45, -1.8081e-07, ..., 2.2132e-07,\n -2.8695e-07, -1.7869e-07],\n [-7.7851e-08, 5.6052e-45, 3.5046e-07, ..., -2.0683e-07,\n -3.6419e-07, 7.5492e-07],\n [ 5.7120e-08, -5.6052e-45, 1.8796e-07, ..., -2.0358e-07,\n -5.2259e-07, -3.7004e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.1651e-12, 1.1987e-11, 3.8784e-12, ..., 2.4096e-12, 2.2528e-12,\n 2.6398e-12],\n [6.7580e-12, 1.2098e-12, 8.5860e-12, ..., 5.9261e-12, 3.1785e-12,\n 9.3709e-12],\n [9.7870e-12, 1.8606e-11, 1.0684e-11, ..., 5.1725e-12, 4.9137e-12,\n 2.4106e-11],\n ...,\n [1.0626e-11, 2.0825e-11, 1.0238e-11, ..., 1.6180e-12, 4.0025e-12,\n 5.3788e-12],\n [1.1614e-11, 2.0569e-11, 1.2569e-11, ..., 5.4767e-12, 4.4078e-12,\n 1.0310e-11],\n [1.2487e-11, 1.9828e-13, 9.9231e-12, ..., 5.6168e-12, 3.2918e-12,\n 1.0860e-11]], device='cuda:0')" + "step": "tensor(15024.)", + "exp_avg": "tensor([[-7.9367e-08, 1.7078e-08, -3.8408e-08, ..., 1.1653e-07,\n 4.7572e-08, -7.3217e-08],\n [-8.2658e-09, 3.7713e-07, -2.6337e-07, ..., -2.4492e-07,\n -5.9590e-08, -1.9650e-08],\n [-1.1100e-07, 2.9449e-08, 1.7999e-07, ..., -6.5590e-07,\n 1.1236e-07, -4.0995e-07],\n ...,\n [ 7.8783e-08, -5.0421e-09, 6.6947e-08, ..., 1.9735e-07,\n 4.0283e-08, -2.6306e-08],\n [ 2.6509e-07, 6.3269e-08, 1.8410e-07, ..., -1.3782e-07,\n 7.2980e-08, 1.3325e-07],\n [ 1.3417e-07, 1.0353e-07, 1.1305e-08, ..., -1.3308e-07,\n 1.9464e-08, -7.7689e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3494e-13, 1.2996e-13, 2.4216e-12, ..., 2.3557e-13, 1.9092e-13,\n 1.6765e-13],\n [3.1151e-13, 3.0217e-13, 3.9348e-12, ..., 8.3523e-13, 2.2964e-13,\n 2.4445e-13],\n [4.5705e-13, 1.9643e-13, 3.5904e-12, ..., 2.0664e-12, 3.4560e-13,\n 2.8505e-13],\n ...,\n [5.2785e-13, 2.9429e-13, 8.0793e-13, ..., 4.4747e-13, 3.7463e-13,\n 3.3272e-13],\n [5.2244e-13, 5.2012e-13, 7.4633e-13, ..., 1.5388e-12, 2.6889e-13,\n 3.4625e-13],\n [4.7913e-13, 3.2800e-13, 5.2512e-13, ..., 8.9269e-13, 2.4899e-13,\n 3.0325e-13]], device='cuda:0')" } }, "param_groups": [ { "lr": 0.008535680352542143, - "name": "scale_256", + "name": "shared", "betas": [ 0.9, 0.999 @@ -263,13 +238,12 @@ "initial_lr": 0.01, "params": [ 0, - 1, - 2 + 1 ] }, { "lr": 0.008535680352542143, - "name": "scale_512", + "name": "scale_384", "betas": [ 0.9, 0.999 @@ -285,9 +259,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 2, 3, - 4, - 5 + 4 ] }, { @@ -308,9 +282,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 5, 6, - 7, - 8 + 7 ] }, { @@ -331,9 +305,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 8, 9, - 10, - 11 + 10 ] }, { @@ -354,9 +328,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 11, 12, - 13, - 14 + 13 ] }, { @@ -377,6 +351,7 @@ "decoupled_weight_decay": true, "initial_lr": 0.005, "params": [ + 14, 15, 16, 17, @@ -405,12 +380,7 @@ 40, 41, 42, - 43, - 44, - 45, - 46, - 47, - 48 + 43 ] } ] @@ -443,20 +413,26 @@ ] }, "metrics": { - "val_acc": 74.622 + "val_acc": 81.654 }, "train_config": { "name": "david_training", - "run_id": "20251012_032356", + "run_id": "20251012_041353", "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", - "model_variant": "clip_vit_b16", + "model_variant": "clip_vit_l14", "num_classes": 1000, - "preset": "high_accuracy", + "preset": "clip_vit_l14", "custom_config_path": null, "num_classes_override": null, "use_belly_override": null, "belly_expand_override": null, "progressive_training_override": true, + "scale_warmup_epochs_override": { + "384": 0, + "768": 1, + "1024": 2, + "1280": 3 + }, "num_epochs": 20, "batch_size": 1024, "learning_rate": 0.01, @@ -473,8 +449,8 @@ "gradient_clip": 5.0, "scheduler_type": "cosine_restarts", "min_lr": 1e-06, - "freeze_strategy": "performance", - "freeze_threshold": 70.0, + "freeze_strategy": "never", + "freeze_threshold": 90.0, "unfreeze_on_plateau": true, "patience": 10, "track_gradients": true,