AbstractPhil commited on
Commit
b4eb156
·
verified ·
1 Parent(s): 760e9d4

Upload weights and configs - Run 20251012_060013

Browse files
weights/best_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da3fe18b58dcc60496b6dae7a4b533511f6ac5f4075a4a2701aa879481b2a188
3
- size 2628344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94cb043f3e87c5d250a453926dd14b701bdde034e1ed7d0d3ca723de7a315e2
3
+ size 325845164
weights/best_model_metadata.json CHANGED
@@ -1,46 +1,36 @@
1
  {
2
- "epoch": 9,
3
  "optimizer_state_dict": {
4
  "state": {
5
  "0": {
6
- "step": "tensor(12520.)",
7
- "exp_avg": "tensor([[ 1.5781e-04, -3.5266e-04, -9.7583e-05, ..., 4.1940e-05,\n 5.6975e-05, 5.5798e-05],\n [ 9.8742e-05, -1.3119e-04, 1.3677e-05, ..., -1.9366e-05,\n 1.0349e-04, 5.0592e-05],\n [-2.9130e-04, 1.9895e-04, -9.0260e-05, ..., -9.9918e-05,\n -1.8629e-04, -6.0961e-05],\n ...,\n [ 5.7664e-05, 4.7397e-05, 1.2529e-04, ..., 2.8799e-05,\n -5.4763e-05, -6.6750e-06],\n [ 6.3787e-05, -2.1633e-05, 6.6595e-05, ..., 2.7704e-05,\n 8.9652e-05, -4.2091e-06],\n [ 1.5883e-04, -1.1030e-04, 6.1721e-05, ..., 1.1175e-04,\n -8.8536e-06, 8.7643e-05]], device='cuda:0')",
8
- "exp_avg_sq": "tensor([[1.6698e-07, 2.8303e-07, 5.0525e-08, ..., 4.0017e-08, 8.8370e-08,\n 5.3000e-08],\n [2.5771e-07, 4.5601e-07, 5.9020e-08, ..., 4.0411e-08, 1.4522e-07,\n 7.0853e-08],\n [2.3775e-07, 4.7696e-07, 6.2997e-08, ..., 6.3045e-08, 5.9060e-08,\n 4.6280e-08],\n ...,\n [1.4690e-07, 4.0507e-07, 4.7863e-08, ..., 4.8895e-08, 4.3604e-08,\n 3.9252e-08],\n [2.7245e-07, 2.1035e-07, 4.9765e-08, ..., 6.8004e-08, 5.1936e-08,\n 4.4202e-08],\n [1.1928e-07, 1.4219e-07, 3.6373e-08, ..., 5.2237e-08, 4.5473e-08,\n 3.9891e-08]], device='cuda:0')"
9
  },
10
  "1": {
11
- "step": "tensor(12520.)",
12
- "exp_avg": "tensor([ 6.3563e-03, 2.5102e-03, -2.3939e-03, 6.9390e-04, -5.5107e-03,\n 1.8048e-03, -1.3423e-03, 1.2744e-03, 1.2543e-03, 2.8361e-03,\n -1.7246e-03, 2.1835e-03, -1.4989e-03, -3.1949e-03, -1.2977e-03,\n 2.1179e-03, -5.9291e-03, -9.2005e-04, 2.8861e-03, 3.0686e-03,\n 3.6523e-03, -7.2715e-04, 4.4408e-03, 2.2455e-03, -2.2390e-03,\n -3.4705e-04, -1.1323e-04, -2.6541e-03, -9.0658e-05, -2.0350e-03,\n 6.4861e-04, 5.6058e-05, 4.3530e-04, 3.0172e-03, -1.1393e-03,\n 1.5743e-03, -1.9657e-03, 6.6744e-03, -6.2427e-04, -6.2990e-03,\n -6.6796e-03, 2.2028e-04, -7.6008e-04, -1.3117e-03, -6.5553e-04,\n 2.9044e-03, 3.5508e-03, 1.5549e-03, 2.6519e-04, 1.7346e-03,\n 3.6852e-04, 5.0304e-04, 1.9452e-04, 7.0303e-04, 2.2807e-03,\n 4.2914e-03, 1.0266e-03, 2.2294e-03, -1.1829e-03, 1.3515e-03,\n 2.0346e-03, -2.9442e-04, -3.9549e-04, 2.2319e-03, -2.4483e-04,\n -2.5583e-03, 4.4849e-03, 1.4333e-03, -4.4578e-03, -2.0497e-03,\n 1.1495e-02, 3.5215e-03, 1.2701e-04, -9.1266e-04, 6.6057e-04,\n -2.2992e-03, 3.9980e-03, -7.1079e-05, 1.1422e-03, -2.3329e-03,\n 3.9809e-03, -6.3117e-04, -4.7632e-03, -2.8593e-03, 5.2501e-03,\n -9.9090e-03, -8.7446e-05, -2.1033e-03, 1.3585e-03, -7.1950e-04,\n 1.5056e-03, 2.0026e-03, 2.2293e-03, -2.6758e-03, -9.9534e-04,\n 4.4668e-03, 1.8737e-03, 1.8360e-03, 3.8190e-03, 1.9203e-04,\n -4.6890e-03, 1.8226e-03, 1.8874e-03, 3.6358e-03, 1.3807e-03,\n -7.7577e-04, -7.9496e-04, 1.3677e-04, 8.1841e-04, -3.1814e-03,\n 1.8048e-03, 3.3553e-03, -4.9102e-03, 1.3808e-03, 5.1207e-03,\n 1.6721e-03, -4.7491e-03, -2.6198e-03, 2.8556e-03, 1.8389e-03,\n -1.3770e-05, -5.7796e-03, 1.0991e-03, 3.1372e-03, 4.8819e-03,\n -5.4394e-04, -1.4880e-03, -7.2622e-03, -2.2927e-04, 4.5269e-04,\n -1.1851e-04, -4.7925e-03, 4.2491e-03, -2.0320e-04, -6.5289e-04,\n -1.1051e-03, -1.5166e-03, -2.3110e-03, 1.2267e-03, 2.2656e-03,\n -1.2047e-03, 3.9875e-03, 1.6500e-03, -2.7301e-03, -1.6858e-03,\n 2.3919e-03, 3.4813e-03, 1.6305e-03, 2.4193e-03, 2.3856e-03,\n -7.6614e-04, 2.1073e-03, -7.2462e-03, -1.5324e-03, -3.6564e-03,\n 3.1190e-03, 4.2763e-03, 7.9173e-03, 8.0900e-04, -1.2533e-03,\n -9.9852e-03, -7.8643e-04, -1.5498e-03, 1.8211e-03, -1.4373e-03,\n -3.4651e-04, 1.3923e-03, -9.1538e-04, -2.9207e-04, -1.1321e-03,\n 9.9711e-04, 2.4242e-03, 1.2174e-03, -6.6163e-04, -1.1129e-03,\n 4.4727e-03, 3.5576e-05, 1.1692e-03, 1.4891e-03, 7.9591e-04,\n 4.2842e-03, 2.2273e-03, -1.0673e-02, 3.6652e-03, -1.1921e-03,\n 5.6809e-04, -2.4059e-04, -1.0264e-03, -1.3917e-04, -3.2738e-04,\n 6.8789e-04, -1.4678e-02, 1.9791e-03, 4.1252e-03, 6.2255e-04,\n -1.4504e-03, -1.8916e-03, 2.2912e-03, 2.5363e-04, 1.0106e-03,\n -2.3781e-04, -5.7982e-03, -6.7601e-05, 2.1338e-03, 1.2260e-03,\n 8.8970e-04, 9.6591e-04, 1.3678e-03, -1.1165e-03, 1.3344e-03,\n 7.4076e-04, 1.4390e-03, -2.3686e-03, -5.5181e-03, -1.3431e-03,\n 6.3643e-04, -6.6143e-03, 1.6457e-03, -4.4675e-04, 3.8409e-04,\n -2.1455e-03, -8.0203e-04, 4.7499e-04, 7.5643e-04, 2.8416e-03,\n 1.2082e-03, -2.3145e-03, -4.4622e-03, 9.9061e-04, -5.4417e-04,\n -1.5471e-03, -2.9405e-03, 7.3630e-04, 4.1378e-03, -4.1548e-03,\n 4.9746e-03, 3.6228e-03, 3.5885e-03, 8.3581e-04, 1.0634e-03,\n -4.0015e-03, 2.1256e-03, -1.1948e-03, 6.5337e-04, -4.4733e-04,\n -5.3818e-03, 1.0360e-03, -4.6869e-03, 1.4853e-03, 1.2977e-03,\n 1.3324e-03, -3.2049e-03, 2.3267e-03, 5.8186e-04, 2.6390e-03,\n -6.4317e-04, 1.1045e-03, 3.9773e-03, 1.3893e-03, 1.8098e-03,\n -4.4831e-03, -1.4275e-04, 4.2212e-03, -6.8254e-05, -1.2112e-04,\n -7.4728e-03, -4.4847e-03, 1.6913e-03, -2.3447e-03, -2.3030e-03,\n -3.8929e-04, 1.3200e-03, -4.6651e-03, -1.2995e-03, -8.0070e-04,\n 2.1448e-03, 1.0197e-03, 3.9658e-04, 1.1312e-03, 4.3220e-03,\n 1.2821e-03, -5.3671e-03, 2.4647e-03, -8.2521e-04, 1.5489e-03,\n 4.2633e-03, -1.3165e-03, 3.1668e-03, 2.0352e-03, -1.0567e-03,\n 5.2512e-03, -7.3721e-03, -1.3819e-03, -3.1426e-04, 2.8566e-03,\n 1.9785e-03, -2.3924e-03, -1.6155e-03, 2.4685e-03, -9.6952e-05,\n 1.0270e-03, -4.7090e-03, 4.4791e-03, 1.5376e-05, 5.2302e-04,\n -9.0432e-04, 1.0867e-03, -1.2149e-03, 4.6787e-04, -4.2822e-03,\n -5.6963e-03, 7.2298e-04, -2.1089e-03, -2.5055e-03, -2.6269e-03,\n -1.9026e-03, 1.1965e-03, -2.7991e-03, -1.5413e-03, 1.5862e-03,\n -3.0942e-04, -1.0107e-03, -4.9279e-04, -3.0776e-03, 2.7953e-03,\n -6.1656e-03, -7.3021e-04, -4.3944e-03, -3.6002e-04, 1.9508e-04,\n -1.0083e-03, 1.0220e-03, 2.3339e-03, -2.4182e-03, 4.1325e-04,\n 1.8667e-03, 3.6274e-03, 1.1361e-03, -6.2754e-04, -1.9324e-04,\n -3.6546e-04, 3.1481e-04, 3.7374e-03, 1.3475e-03, 2.9976e-03,\n -7.3433e-05, 2.9237e-03, -2.1311e-03, -7.1621e-03, 9.0230e-05,\n 4.4070e-03, -2.4974e-03, 2.1867e-03, 1.6351e-03, 9.3232e-04,\n 1.6370e-03, -2.1578e-03, -6.9997e-04, -9.2883e-04, 9.8079e-04,\n -8.8313e-04, 6.4415e-04, 1.3825e-03, -5.7022e-04, -1.4116e-03,\n 1.3109e-03, -3.5973e-04, -9.7172e-03, 2.9686e-04, -1.5405e-03,\n -8.0118e-04, 4.2690e-03, -1.8635e-03, -4.1665e-04, 3.4896e-03,\n 1.3644e-03, 3.2192e-03, 4.5342e-03, -4.4682e-03, 1.7362e-03,\n 1.7646e-03, 5.7927e-05, -6.0777e-03, 4.1298e-04, -3.8661e-04,\n -4.4611e-04, 3.9875e-04, -6.9911e-04, 4.4768e-03, 1.8348e-04,\n 3.3966e-03, 2.6140e-03, -8.2243e-03, -6.8521e-04, -2.9733e-04,\n 4.7474e-03, -3.4884e-03, 4.5268e-03, -8.1612e-04, -7.6521e-04,\n 2.2630e-03, 2.9607e-03, 2.9986e-03, 2.7078e-03, -2.5313e-03,\n -5.4122e-03, 2.9183e-03, -1.5998e-03, -3.4358e-03, -2.9575e-04,\n -2.0512e-03, 1.3428e-03, 1.0281e-03, -1.3787e-03, 3.1142e-03,\n -7.7688e-04, -2.9966e-05, -2.5548e-03, 2.4785e-03, 1.2744e-04,\n 9.5441e-04, -2.6103e-03, 1.7985e-03, 3.4749e-03, -8.4321e-08,\n 1.6383e-03, -2.7722e-04, 6.5776e-03, 2.4065e-04, 3.0879e-03,\n -7.4350e-03, -8.4307e-04, -7.2875e-04, 1.0637e-03, -2.5961e-03,\n 8.5130e-04, -1.6339e-03, 4.0686e-04, 6.7186e-04, 5.8664e-04,\n -5.3609e-03, -1.3997e-04, 3.3515e-03, -8.5538e-04, 1.2525e-03,\n 8.4309e-04, -4.8851e-03, 1.0791e-03, 1.6683e-03, 1.8069e-03,\n 6.6044e-04, 1.4694e-03, -1.7793e-03, -3.6584e-05, 2.1455e-03,\n -5.9628e-05, -3.8228e-03, -8.0960e-05, -2.9003e-03, -1.4925e-03,\n 1.1623e-03, 5.0000e-04, 1.6481e-03, -6.5982e-03, 3.4492e-04,\n -2.4010e-03, 9.0007e-05, -1.1398e-04, -9.2671e-04, -1.2297e-03,\n 1.7663e-03, -1.6166e-03, -4.4402e-03, 1.3900e-04, -6.9665e-03,\n -3.2943e-03, -1.8867e-03, -3.2147e-03, 9.4666e-04, -9.1608e-04,\n 2.3092e-03, 9.5321e-04, -1.8345e-03, 4.1769e-04, 1.1811e-03,\n -1.9273e-03, 8.5545e-03, -6.6741e-04, -3.4244e-03, 3.2111e-03,\n 9.0464e-04, 1.4275e-03, 1.1919e-03, -3.3361e-03, -1.6580e-03,\n -7.3152e-03, -5.0109e-04, 3.6467e-03, 6.5521e-07, 3.3706e-03,\n -3.8867e-04, -4.0687e-04, 2.5767e-03, -1.3249e-03, -5.5586e-04,\n -2.2303e-03, 5.3057e-03, 2.2691e-03, -4.4590e-03, 1.3507e-03,\n -7.4353e-04, 5.9935e-03], device='cuda:0')",
13
- "exp_avg_sq": "tensor([7.2512e-05, 1.1019e-04, 9.9248e-05, 5.5612e-05, 6.7121e-05, 1.0031e-04,\n 5.9773e-05, 5.1705e-05, 6.4033e-05, 5.6169e-05, 6.8238e-05, 1.6573e-05,\n 1.0072e-04, 1.4606e-04, 5.2493e-05, 1.0045e-04, 8.4599e-05, 6.0681e-05,\n 6.1493e-05, 9.0432e-05, 6.7030e-05, 7.0840e-05, 1.1957e-04, 4.8057e-05,\n 1.4041e-04, 7.1113e-05, 7.8978e-05, 6.9880e-05, 5.9417e-05, 5.7770e-05,\n 8.7513e-05, 7.1569e-05, 6.7041e-05, 6.2300e-05, 1.3242e-04, 5.0675e-05,\n 1.4801e-04, 7.9039e-05, 4.1354e-05, 7.4328e-05, 7.0986e-05, 7.9117e-05,\n 8.5124e-05, 6.4362e-05, 7.4290e-05, 6.6766e-05, 8.6604e-05, 4.0079e-05,\n 4.4369e-05, 7.3125e-05, 2.3740e-05, 7.4951e-05, 5.3456e-05, 1.2581e-04,\n 6.3913e-05, 7.1525e-05, 6.8563e-05, 5.9110e-05, 4.7262e-05, 1.0414e-04,\n 1.0640e-04, 8.7026e-05, 6.8453e-05, 2.7108e-05, 6.4277e-05, 6.4376e-05,\n 7.4997e-05, 9.0205e-05, 8.2962e-05, 8.7746e-05, 1.1170e-04, 9.4883e-05,\n 7.4454e-05, 6.6858e-05, 5.9579e-05, 1.0697e-04, 8.3371e-05, 6.6815e-05,\n 5.7789e-05, 9.6536e-05, 1.0008e-04, 6.2869e-05, 5.7528e-05, 8.2585e-05,\n 5.8706e-05, 1.0831e-04, 1.1401e-04, 1.3857e-04, 1.9935e-05, 6.5200e-05,\n 1.0378e-04, 1.3060e-04, 9.3773e-05, 5.6892e-05, 6.7640e-05, 4.9256e-05,\n 9.2273e-05, 2.9546e-05, 6.7948e-05, 9.2812e-05, 4.7165e-05, 4.8418e-05,\n 6.8732e-05, 8.0735e-05, 7.0170e-05, 1.1922e-04, 9.8124e-05, 6.6470e-05,\n 7.6156e-05, 5.9479e-05, 5.6384e-05, 7.0079e-05, 5.9407e-05, 1.3077e-04,\n 5.7598e-05, 7.4382e-05, 8.1519e-05, 5.9187e-05, 6.5026e-05, 5.9065e-05,\n 6.2860e-05, 1.0174e-04, 7.1195e-05, 5.0764e-05, 5.7857e-05, 6.4192e-05,\n 6.1678e-05, 7.2441e-05, 7.3560e-05, 4.9425e-05, 6.4231e-05, 9.7638e-05,\n 9.1556e-05, 6.4483e-05, 5.9572e-05, 1.0801e-04, 8.4543e-05, 8.1038e-05,\n 6.6712e-05, 6.6604e-05, 9.2933e-05, 6.4318e-05, 8.5129e-05, 7.6727e-05,\n 7.6444e-05, 1.0942e-04, 8.7810e-05, 7.8066e-05, 9.8640e-05, 8.4970e-05,\n 7.0386e-05, 4.5215e-05, 6.9736e-05, 4.8611e-05, 8.0937e-05, 7.8415e-05,\n 9.6527e-05, 6.2934e-05, 1.2635e-04, 9.8119e-05, 1.0523e-04, 5.4049e-05,\n 6.6602e-05, 9.7183e-05, 3.0206e-05, 5.5845e-05, 1.0028e-04, 6.6852e-05,\n 7.3802e-05, 7.6700e-05, 5.7336e-05, 9.6055e-05, 7.6344e-05, 1.0505e-04,\n 7.6953e-05, 8.1130e-05, 8.5807e-05, 5.1258e-05, 4.4070e-05, 9.9288e-05,\n 1.8838e-04, 8.6845e-05, 6.1611e-05, 1.0970e-04, 6.3698e-05, 5.3190e-05,\n 7.1777e-05, 6.9004e-05, 5.6626e-05, 7.1177e-05, 6.9374e-05, 1.4085e-04,\n 8.0837e-05, 6.9333e-05, 8.4503e-05, 8.3074e-05, 6.1865e-05, 5.7168e-05,\n 6.1964e-05, 6.7875e-05, 7.5165e-05, 6.0273e-05, 6.2879e-05, 1.1144e-04,\n 5.1099e-05, 4.3593e-05, 6.0414e-05, 5.3644e-05, 6.1797e-05, 9.7747e-05,\n 6.1278e-05, 7.1418e-05, 8.3301e-05, 5.3395e-05, 1.4802e-04, 6.1757e-05,\n 7.9729e-05, 7.4921e-05, 4.8466e-05, 6.7437e-05, 8.6289e-05, 1.1120e-04,\n 1.0087e-04, 5.7920e-05, 8.3983e-05, 8.7964e-05, 9.8691e-05, 8.5369e-05,\n 6.4883e-05, 7.9041e-05, 6.0690e-05, 9.2641e-05, 6.0072e-05, 5.4724e-05,\n 4.6984e-05, 7.3237e-05, 9.9077e-05, 1.1399e-04, 8.2742e-05, 7.2863e-05,\n 7.8897e-05, 5.9803e-05, 6.4471e-05, 5.7344e-05, 5.6070e-05, 1.3041e-04,\n 6.6416e-05, 8.8161e-05, 5.2970e-05, 6.4837e-05, 5.6148e-05, 5.0703e-05,\n 8.3795e-05, 5.7469e-05, 8.0018e-05, 7.3807e-05, 7.5782e-05, 1.1732e-04,\n 7.1639e-05, 8.2258e-05, 7.8587e-05, 6.4899e-05, 1.2533e-04, 5.7355e-05,\n 6.2201e-05, 7.6339e-05, 6.1350e-05, 6.5109e-05, 4.5595e-05, 1.1440e-04,\n 4.2570e-05, 5.6618e-05, 8.4310e-05, 5.3958e-05, 6.7097e-05, 7.2834e-05,\n 6.8835e-05, 4.9115e-05, 8.2634e-05, 6.2827e-05, 1.1426e-04, 8.8805e-05,\n 8.5880e-05, 9.3828e-05, 6.4395e-05, 1.1473e-04, 4.3674e-05, 8.1672e-05,\n 6.1591e-05, 3.6387e-05, 8.3369e-05, 2.0206e-04, 7.8578e-05, 6.4915e-05,\n 7.8079e-05, 5.1374e-05, 1.3915e-04, 1.2021e-04, 8.2560e-05, 8.5514e-05,\n 5.4398e-05, 7.0501e-05, 8.9104e-05, 1.0418e-04, 1.2797e-04, 7.5903e-05,\n 7.4508e-05, 4.2443e-05, 6.1746e-05, 6.7860e-05, 5.8586e-05, 6.9703e-05,\n 5.9039e-05, 8.4074e-05, 6.1156e-05, 5.9695e-05, 4.6840e-05, 5.5527e-05,\n 8.3250e-05, 8.8701e-05, 5.5929e-05, 1.1869e-04, 1.6266e-04, 1.5948e-04,\n 4.7998e-05, 6.0044e-05, 5.6220e-05, 8.8606e-05, 4.1672e-05, 6.8777e-05,\n 5.7954e-05, 8.8043e-05, 7.8968e-05, 6.7892e-05, 5.7282e-05, 6.2787e-05,\n 7.5525e-05, 8.0414e-05, 1.1319e-04, 9.2808e-05, 5.8435e-05, 6.5216e-05,\n 5.4113e-05, 8.3818e-05, 8.2172e-05, 9.7351e-05, 6.6038e-05, 7.9793e-05,\n 8.1784e-05, 9.9960e-05, 6.0991e-05, 6.2078e-05, 6.4862e-05, 8.8328e-05,\n 7.8428e-05, 6.7388e-05, 4.5181e-05, 8.6514e-05, 7.8928e-05, 6.4784e-05,\n 6.2382e-05, 4.9389e-05, 9.9456e-05, 6.1233e-05, 7.9262e-05, 9.6575e-05,\n 7.0088e-05, 1.2153e-04, 5.3392e-05, 3.9271e-05, 1.4991e-04, 6.4224e-05,\n 9.3520e-05, 3.2669e-05, 1.1926e-04, 1.6915e-04, 7.5040e-05, 6.2417e-05,\n 8.0370e-05, 3.7825e-05, 7.3172e-05, 6.6714e-05, 1.7721e-04, 8.2638e-05,\n 1.1069e-04, 5.3679e-05, 5.0091e-05, 9.0835e-05, 1.0999e-04, 1.5140e-04,\n 8.8529e-05, 5.0547e-05, 8.3844e-05, 4.8599e-05, 6.3352e-05, 7.7482e-05,\n 9.3653e-05, 5.5084e-05, 5.7589e-05, 9.5163e-05, 1.5871e-04, 1.2744e-04,\n 6.9541e-05, 6.4151e-05, 7.0628e-05, 1.3157e-04, 6.4501e-05, 3.7270e-05,\n 8.6574e-05, 5.7349e-05, 8.2512e-05, 7.2937e-05, 7.7274e-05, 5.7988e-05,\n 8.3994e-05, 4.8179e-05, 9.4418e-05, 1.1850e-04, 5.1639e-05, 1.2304e-04,\n 7.3215e-05, 8.6787e-05, 8.2314e-05, 8.7038e-05, 7.4435e-05, 7.7177e-05,\n 7.2046e-05, 9.2039e-05, 8.2250e-05, 1.0159e-04, 8.9180e-05, 6.8441e-05,\n 8.5019e-05, 4.4265e-05, 6.2266e-05, 6.2828e-05, 1.0825e-04, 6.2090e-05,\n 6.6607e-05, 6.1238e-05, 1.1401e-04, 8.1602e-05, 9.3099e-05, 6.1156e-05,\n 7.3361e-05, 7.1485e-05, 9.7311e-05, 5.0915e-05, 7.6193e-05, 4.7340e-05,\n 7.3933e-05, 6.2318e-05, 7.6087e-05, 5.0242e-05, 7.3717e-05, 7.1194e-05,\n 5.5668e-05, 7.0081e-05, 6.7980e-05, 5.7673e-05, 1.0371e-04, 4.7619e-05,\n 5.7822e-05, 8.2032e-05, 6.2093e-05, 7.3440e-05, 1.2068e-04, 1.3620e-04,\n 1.1772e-04, 7.7936e-05, 1.1727e-04, 3.7691e-05, 5.7127e-05, 6.8972e-05,\n 6.2754e-05, 5.4889e-05, 5.8448e-05, 7.1129e-05, 1.1267e-04, 7.0763e-05,\n 9.7364e-05, 1.5761e-04, 6.3211e-05, 6.2182e-05, 8.9459e-05, 1.2892e-04,\n 1.4036e-04, 8.5110e-05, 1.2891e-04, 6.0253e-05, 1.0926e-04, 7.3221e-05,\n 5.9762e-05, 8.5187e-05, 8.6531e-05, 1.0131e-04, 5.0183e-05, 5.5968e-05,\n 7.5582e-05, 3.8386e-05, 6.1099e-05, 9.0729e-05, 5.2793e-05, 9.0854e-05,\n 7.4200e-05, 6.2664e-05, 8.1053e-05, 6.8687e-05, 1.0426e-04, 6.5999e-05,\n 9.0164e-05, 7.7003e-05], device='cuda:0')"
14
  },
15
  "2": {
16
- "step": "tensor(12520.)",
17
- "exp_avg": "tensor([ 5.9179e-03, 2.4288e-03, -3.1620e-03, 5.0009e-04, -6.5902e-03,\n 2.2251e-03, -1.0909e-03, 8.8967e-04, 2.2133e-03, 3.3763e-03,\n -3.7177e-03, 1.5845e-02, -2.0845e-03, -3.6139e-03, -2.7652e-03,\n 2.0113e-03, -9.7474e-03, -1.9262e-03, 4.5385e-03, 4.0117e-03,\n 4.6705e-03, -7.5213e-04, 3.7789e-03, 2.8298e-03, -3.2070e-03,\n 1.3596e-05, 3.3771e-04, -4.7682e-03, -6.4656e-04, -2.5916e-03,\n 2.6233e-04, -6.7067e-04, 3.5948e-04, 2.8759e-03, -1.5189e-03,\n 2.0348e-03, -1.9037e-03, 1.1980e-02, -1.9229e-03, -5.8410e-03,\n -1.0025e-02, -8.4091e-04, -1.4543e-03, -2.5279e-03, -7.3695e-04,\n 3.7865e-03, 4.7156e-03, 1.4225e-03, 1.1423e-03, 1.5592e-03,\n -8.1527e-06, 1.8039e-03, 5.4561e-04, 5.8288e-04, 4.2004e-03,\n 5.1436e-03, 1.2627e-03, 3.5551e-03, -2.5217e-03, 1.5950e-03,\n 2.1115e-03, -1.0162e-03, -1.7712e-03, 2.2889e-02, 1.4403e-05,\n -3.5693e-03, 7.3837e-03, 1.5567e-03, -4.1638e-03, -3.7842e-03,\n 1.1757e-02, 5.2155e-03, 1.6883e-04, -5.6254e-04, 1.4957e-03,\n -3.3904e-03, 3.5483e-03, 7.6258e-04, 2.1427e-03, -3.0355e-03,\n 9.3973e-03, -3.8613e-03, -1.6718e-02, -3.4134e-03, 1.0281e-02,\n -8.5164e-03, -4.5924e-04, -1.9213e-03, 5.6052e-45, -1.2091e-03,\n 2.8456e-03, 2.5367e-03, 2.0716e-03, -7.0803e-03, -1.4098e-03,\n 7.6939e-03, 1.5692e-03, 7.1461e-03, 5.3430e-03, -1.2514e-04,\n -7.2770e-03, 4.2862e-03, 1.8691e-03, 7.1632e-03, 2.0415e-03,\n -4.1226e-04, -4.7169e-04, -4.7672e-04, 1.4480e-03, -7.3167e-03,\n 2.9749e-03, 4.5858e-03, -5.6974e-03, 1.7546e-03, 7.0348e-03,\n 1.0602e-03, -4.3732e-03, -3.7841e-03, 3.3265e-03, 2.1837e-03,\n -1.3303e-03, -4.1551e-03, 1.1376e-03, 6.2110e-03, 7.3238e-03,\n 8.6742e-04, -2.5176e-03, -6.2455e-03, 1.5207e-04, -7.5959e-04,\n -3.7181e-04, -7.3135e-03, 4.5273e-03, -1.1502e-03, -2.5702e-03,\n -2.8237e-03, -1.5067e-03, -2.6923e-03, 1.0748e-03, 3.6037e-03,\n -1.5874e-03, 5.8147e-03, 1.9918e-03, -3.4507e-03, -2.8908e-03,\n 2.4687e-03, 6.2987e-03, 1.6668e-03, 3.5637e-03, 3.5042e-03,\n -1.8080e-06, 4.1910e-03, -8.0065e-03, -2.5221e-03, -2.5295e-03,\n 3.8011e-03, 4.3212e-03, 5.5294e-03, 1.5811e-03, -1.9477e-03,\n -1.0212e-02, -1.8673e-03, -4.1675e-03, 1.4097e-03, -2.8962e-03,\n -1.0165e-03, 4.9469e-04, -2.2209e-03, -5.6626e-04, -1.5930e-03,\n 1.5485e-04, 2.3632e-03, 1.1629e-03, -1.6872e-03, -3.1925e-03,\n 4.9390e-03, -5.4674e-04, 1.7492e-03, 2.6695e-03, 1.3416e-03,\n 3.2587e-03, 3.1850e-03, -1.9013e-02, 8.2295e-03, -9.7193e-04,\n 2.1936e-04, 3.5064e-04, -4.7841e-03, -6.5066e-04, 6.1262e-05,\n 8.8369e-04, -1.9847e-02, 4.5429e-03, 4.4810e-03, 2.6942e-04,\n -1.9815e-03, -3.6864e-03, 1.2684e-03, 4.0967e-04, 1.1086e-04,\n 1.2825e-05, -1.1649e-02, -4.6809e-04, 3.1181e-03, 3.6983e-03,\n 1.1450e-03, 1.0211e-03, 1.4666e-03, -1.7599e-03, 1.5627e-03,\n 2.3803e-03, 2.5947e-03, -4.0402e-03, -7.3903e-03, -1.4962e-03,\n 4.8489e-04, -7.3299e-03, 2.4453e-03, -1.4570e-03, -2.7618e-04,\n -3.1651e-03, -5.2647e-04, 2.7606e-04, 6.9595e-04, 4.4364e-03,\n 2.5898e-04, -3.1478e-03, -5.5785e-03, 2.3480e-03, -7.1072e-04,\n -1.0468e-03, -3.1532e-03, 2.2517e-03, 8.6322e-03, -1.1294e-02,\n 7.2642e-03, 4.9154e-03, 3.8961e-03, 1.5505e-05, 1.6201e-03,\n -3.5197e-03, 3.0113e-03, -1.4033e-03, 1.2875e-03, -7.7739e-04,\n -5.0398e-03, 1.5022e-03, -5.8564e-03, 4.1609e-03, 1.9251e-03,\n 1.7825e-03, -4.4920e-03, 3.2338e-03, 2.1431e-04, 3.6387e-03,\n -3.2419e-04, 1.7206e-03, 5.7629e-03, 1.1179e-03, 3.2301e-03,\n -6.6040e-03, -1.3769e-03, 3.1471e-03, -2.0679e-04, -6.3024e-04,\n -1.4908e-02, -6.1845e-03, 1.7791e-03, -2.5586e-03, -1.3206e-03,\n -8.2482e-04, 1.8436e-03, -3.3002e-03, -1.3945e-03, -1.9181e-03,\n 5.1308e-03, 1.9556e-03, 1.0338e-03, 2.6772e-04, 8.5433e-03,\n 1.8289e-03, -4.1966e-03, 4.0716e-03, -5.3167e-04, 2.6223e-03,\n 5.9214e-03, -2.4009e-03, 4.3300e-03, 3.9352e-03, -3.8254e-03,\n 4.4148e-03, -7.5865e-03, -1.2896e-03, -2.1571e-04, 3.7853e-03,\n 2.7859e-03, -5.3227e-03, -4.2058e-03, 1.6318e-03, -1.1382e-03,\n 1.8690e-03, -1.3206e-02, 2.7102e-03, -8.0671e-04, 3.8812e-04,\n -2.3706e-03, 1.1240e-03, -3.0376e-03, 1.1678e-03, -4.3038e-03,\n -1.2439e-02, 1.9518e-03, -2.5336e-03, -2.5811e-03, -2.9344e-03,\n -2.4017e-03, 3.0653e-03, -5.5375e-03, -1.3871e-03, 1.4715e-03,\n -1.4199e-03, -1.1652e-03, -2.5365e-03, -2.7541e-03, 6.8057e-03,\n -6.7668e-03, -1.8281e-03, -4.9102e-03, -1.0285e-03, -5.9168e-04,\n -2.3601e-03, 1.4646e-03, 2.9193e-03, -3.1496e-03, 7.2395e-04,\n 1.7736e-03, 4.3440e-03, 1.3586e-03, -4.1171e-04, 4.1991e-04,\n -5.1535e-04, 1.1736e-04, 6.2646e-03, 2.2715e-03, 2.6446e-03,\n -5.2332e-04, 3.1694e-03, -2.3462e-03, -7.8308e-03, -1.6162e-03,\n 5.5491e-03, -6.0800e-03, 4.8556e-03, 1.9386e-03, 6.7793e-04,\n 2.3286e-03, -5.3296e-03, -1.7029e-03, -1.3067e-03, 9.9814e-04,\n -1.8645e-03, 2.1908e-04, 9.6970e-04, -6.5636e-04, -1.2228e-03,\n 1.8147e-04, -7.1861e-04, -1.3943e-02, -4.1772e-04, -3.5339e-03,\n -1.2662e-03, 5.8686e-03, -1.3779e-03, -2.1644e-03, 3.6517e-03,\n 2.1144e-03, 2.9590e-03, 9.8132e-03, -3.6253e-03, 3.1243e-03,\n 1.5647e-03, 2.7748e-04, -5.5938e-03, 1.5962e-03, -1.0462e-03,\n 5.2084e-04, 5.0388e-04, -1.2982e-03, 6.0874e-03, 8.0412e-04,\n 5.2148e-03, 7.1381e-03, -8.1751e-03, -3.9765e-03, -1.4969e-03,\n 6.9788e-03, -3.6847e-03, 9.2441e-03, -6.6328e-04, 3.5143e-04,\n 2.1182e-03, 3.7152e-03, 4.0868e-03, 2.9959e-03, -2.1054e-03,\n -9.2702e-03, 4.7768e-03, -4.6514e-03, -4.9798e-03, -1.0931e-03,\n -1.8984e-03, 1.9416e-03, 4.8931e-04, -2.9233e-03, 3.3145e-03,\n -1.6660e-03, -3.4924e-04, -2.7322e-03, 6.1902e-03, 2.1998e-04,\n 1.6953e-03, -3.4511e-03, 2.0148e-03, 5.1786e-03, -6.4165e-04,\n 2.1296e-03, -1.2231e-03, 8.4293e-03, 1.4846e-03, 3.4701e-03,\n -7.1356e-03, -1.7724e-03, -8.6714e-04, 2.9067e-03, -2.0829e-03,\n 2.5659e-05, -3.8062e-03, 1.5016e-03, 1.0422e-03, 1.9606e-03,\n -8.8975e-03, -1.3261e-04, 4.0590e-03, -2.1499e-03, 1.6862e-03,\n 1.7875e-03, -8.2716e-03, 1.0825e-03, 1.7071e-03, 4.5010e-03,\n 1.6516e-03, 1.9078e-03, -2.1212e-03, 1.0434e-03, 6.1096e-03,\n 1.6403e-04, -5.2427e-03, -1.3559e-03, -4.6814e-03, -2.1426e-03,\n 6.5757e-04, 8.3465e-04, 2.6719e-03, -1.1288e-02, 3.7871e-04,\n -6.7030e-03, 1.6656e-04, 6.0154e-04, -1.3943e-03, -2.1548e-03,\n 2.8857e-03, -3.1166e-03, -5.5567e-03, 2.7103e-04, -1.2102e-02,\n -5.9273e-03, -2.8968e-03, -4.5755e-03, 1.0069e-03, -5.6810e-04,\n 2.3190e-03, 1.4321e-03, -3.0973e-03, -2.2337e-04, 1.8840e-03,\n -1.8492e-03, 7.6737e-03, -4.7839e-04, -3.2643e-03, 4.3095e-03,\n 1.2256e-03, 2.0274e-03, 1.8113e-03, -5.7962e-03, -1.3525e-03,\n -1.0404e-02, -5.1932e-04, 4.2320e-03, -1.3714e-04, 3.9255e-03,\n -1.3970e-04, -4.5891e-04, 4.0743e-03, -2.3996e-03, -4.1858e-04,\n -2.0692e-03, 8.3024e-03, 3.6214e-03, -6.1032e-03, 2.1308e-03,\n -1.6616e-03, 9.3752e-03], device='cuda:0')",
18
- "exp_avg_sq": "tensor([7.1175e-05, 1.4666e-04, 1.2013e-04, 1.1989e-04, 1.3125e-04, 9.5474e-05,\n 5.5966e-05, 9.5907e-05, 1.5575e-04, 1.3895e-04, 1.3559e-04, 1.1171e-03,\n 1.4501e-04, 1.5508e-04, 2.1205e-04, 7.2237e-05, 2.1174e-04, 2.2622e-04,\n 2.2056e-04, 1.1653e-04, 1.4728e-04, 8.5511e-05, 1.0299e-04, 6.7795e-05,\n 2.0711e-04, 8.8416e-05, 1.6652e-04, 1.3755e-04, 2.8306e-04, 2.6655e-04,\n 1.0102e-04, 1.5262e-04, 7.5848e-05, 6.6357e-05, 1.8992e-04, 9.9344e-05,\n 1.1777e-04, 1.9557e-04, 1.4310e-04, 9.0165e-05, 1.1001e-04, 1.1800e-04,\n 1.2269e-04, 1.4745e-04, 7.8662e-05, 1.1200e-04, 1.3440e-04, 1.7420e-04,\n 2.9178e-04, 8.0590e-05, 6.6216e-04, 2.9953e-04, 1.3417e-04, 8.2108e-05,\n 1.5118e-04, 1.1411e-04, 1.3849e-04, 1.8482e-04, 2.0128e-04, 2.1895e-04,\n 2.1034e-04, 1.2737e-04, 2.1742e-04, 7.0049e-03, 1.2139e-04, 1.3009e-04,\n 1.7836e-04, 1.4483e-04, 5.6493e-05, 2.1242e-04, 9.5870e-05, 1.9338e-04,\n 1.5128e-04, 1.0852e-04, 1.0317e-04, 2.8749e-04, 9.3913e-05, 3.8385e-04,\n 2.0008e-04, 1.5475e-04, 2.8746e-04, 5.1517e-04, 5.8982e-04, 1.2836e-04,\n 1.9666e-04, 8.2556e-05, 1.0065e-04, 1.7549e-04, 4.2788e-09, 2.3508e-04,\n 1.0242e-04, 2.1860e-04, 1.3710e-04, 1.5596e-04, 3.2840e-04, 1.4293e-04,\n 8.9726e-05, 4.8278e-04, 1.0964e-04, 2.5211e-04, 1.7257e-04, 9.3426e-05,\n 1.0423e-04, 3.5303e-04, 1.6504e-04, 7.8838e-05, 8.4499e-05, 9.1568e-05,\n 1.0345e-04, 1.8069e-04, 2.0385e-04, 1.1273e-04, 1.3993e-04, 6.7499e-05,\n 1.2605e-04, 1.1379e-04, 1.1638e-04, 1.2924e-04, 1.0915e-04, 5.0988e-05,\n 1.9766e-04, 9.7728e-05, 1.7015e-04, 2.0158e-04, 1.4570e-04, 8.8572e-05,\n 7.3807e-05, 7.1639e-05, 9.3599e-05, 2.1319e-04, 1.1915e-04, 1.8966e-04,\n 1.6046e-04, 2.4895e-04, 3.3983e-04, 1.9387e-04, 1.1589e-04, 1.2784e-04,\n 9.0809e-05, 2.2186e-04, 1.4346e-04, 2.0086e-04, 1.2570e-04, 1.6064e-04,\n 1.6037e-04, 1.4611e-04, 2.5658e-04, 1.3963e-04, 1.0155e-04, 1.5909e-04,\n 7.9474e-05, 1.7451e-04, 1.0883e-04, 1.5226e-04, 9.0137e-05, 1.4324e-04,\n 1.2482e-04, 5.6658e-05, 1.3776e-04, 1.0966e-04, 1.0330e-04, 1.0768e-04,\n 1.9002e-04, 1.0043e-04, 1.1680e-04, 1.7373e-04, 7.2426e-05, 2.2572e-04,\n 1.5279e-04, 2.1080e-04, 1.2814e-04, 1.9749e-04, 1.4092e-04, 1.2123e-04,\n 3.7377e-04, 1.2019e-04, 1.5099e-04, 2.5081e-04, 1.9434e-04, 2.0892e-04,\n 1.1456e-04, 1.5322e-04, 2.2800e-04, 4.3026e-04, 1.9856e-04, 1.0693e-04,\n 2.2683e-04, 3.8972e-04, 1.4191e-04, 1.8889e-04, 1.1399e-04, 2.3602e-04,\n 2.3678e-04, 7.1652e-05, 1.7209e-04, 7.8757e-05, 1.6825e-04, 1.1253e-04,\n 1.1208e-04, 7.7576e-05, 1.5685e-04, 2.7045e-04, 2.2336e-04, 1.4308e-04,\n 1.4151e-04, 1.9520e-04, 1.1178e-04, 1.1497e-04, 6.9835e-05, 9.4069e-05,\n 1.1073e-04, 2.8151e-04, 2.0439e-04, 1.1097e-04, 1.4920e-04, 1.4431e-04,\n 8.9549e-05, 2.1006e-04, 3.0436e-04, 1.6202e-04, 2.0814e-04, 1.3489e-04,\n 2.0014e-04, 1.6465e-04, 1.8940e-04, 5.5719e-05, 2.2733e-04, 9.9278e-05,\n 1.4673e-04, 1.0007e-04, 4.8407e-05, 1.1687e-04, 1.8682e-04, 2.3849e-04,\n 2.8017e-04, 1.3796e-04, 2.7824e-04, 2.2457e-04, 6.2522e-05, 8.8890e-05,\n 1.5642e-04, 7.5219e-05, 8.8979e-05, 1.9531e-04, 1.0501e-04, 1.1453e-04,\n 7.6857e-05, 1.3278e-04, 3.7375e-04, 1.3996e-04, 2.0352e-04, 1.7106e-04,\n 1.6363e-04, 1.1508e-04, 1.1408e-04, 1.5128e-04, 7.2099e-05, 2.3732e-04,\n 6.0973e-05, 1.1759e-04, 1.3243e-04, 2.3874e-04, 1.3828e-04, 9.0596e-05,\n 1.6268e-04, 2.4502e-04, 1.5468e-04, 7.7388e-05, 9.5793e-05, 1.8899e-04,\n 1.1937e-04, 9.3243e-05, 1.7991e-04, 1.0463e-04, 1.2113e-04, 4.6656e-04,\n 1.7038e-04, 1.4686e-04, 1.9492e-04, 2.2054e-04, 1.5912e-04, 1.5327e-04,\n 3.6605e-04, 1.2268e-04, 1.2557e-04, 1.5061e-04, 1.0913e-04, 2.8510e-04,\n 2.3443e-04, 2.5786e-04, 7.5165e-05, 1.8867e-04, 1.0341e-04, 7.2293e-05,\n 1.5550e-04, 1.7008e-04, 2.7282e-04, 3.4179e-04, 5.2470e-05, 2.9169e-04,\n 1.2613e-04, 7.0636e-04, 8.7767e-05, 7.6629e-05, 1.1408e-04, 2.3363e-04,\n 2.2286e-04, 1.9920e-04, 6.7311e-05, 1.2537e-04, 2.5613e-04, 1.1757e-04,\n 8.6190e-05, 1.3787e-04, 7.0569e-05, 8.5019e-05, 2.5395e-04, 2.1999e-04,\n 1.3997e-04, 1.2824e-04, 2.3120e-04, 2.6346e-04, 1.6609e-04, 1.5159e-04,\n 2.8784e-04, 9.5537e-05, 1.2665e-04, 1.1337e-04, 1.7194e-04, 1.9314e-04,\n 1.7271e-04, 2.1445e-04, 1.5554e-04, 1.1803e-04, 9.4835e-05, 1.2489e-04,\n 9.7879e-05, 1.6447e-04, 5.9460e-05, 1.8565e-04, 1.9044e-04, 1.4530e-04,\n 1.3499e-04, 1.8377e-04, 9.1965e-05, 2.2272e-04, 2.4216e-04, 1.4595e-04,\n 7.3163e-05, 3.2908e-04, 1.0088e-04, 2.3814e-04, 3.2496e-04, 7.9750e-05,\n 1.2595e-04, 2.6875e-04, 2.4396e-04, 1.3370e-04, 2.2907e-04, 8.1449e-05,\n 1.5257e-04, 7.4695e-05, 5.9321e-05, 1.7478e-04, 1.0921e-04, 4.0271e-05,\n 9.1579e-05, 3.6209e-04, 2.1234e-04, 1.2756e-04, 1.6142e-04, 1.3557e-04,\n 1.1898e-04, 1.4819e-04, 1.3604e-04, 3.6208e-04, 6.9492e-05, 3.9082e-04,\n 1.0651e-04, 2.2054e-04, 1.6078e-04, 1.2716e-04, 2.1047e-04, 2.6919e-04,\n 1.9055e-04, 3.3430e-04, 1.0648e-04, 1.3429e-04, 1.8872e-04, 1.7697e-04,\n 1.4761e-04, 4.4019e-04, 9.1675e-05, 1.8625e-04, 1.6765e-04, 1.3441e-04,\n 8.4081e-05, 2.5302e-04, 1.7665e-04, 5.8364e-05, 1.4961e-04, 1.2196e-04,\n 1.7554e-04, 9.9895e-05, 8.6891e-05, 2.9479e-04, 1.0218e-04, 2.0177e-04,\n 9.6523e-05, 1.6396e-04, 1.8132e-04, 1.7522e-04, 9.2247e-05, 1.2239e-04,\n 1.1903e-04, 1.4660e-04, 1.1903e-04, 1.7398e-04, 4.2109e-04, 1.7038e-04,\n 1.4555e-04, 2.9065e-04, 1.5793e-04, 1.7824e-04, 1.4013e-04, 1.4241e-04,\n 9.7378e-05, 1.8195e-04, 1.3112e-04, 8.1108e-05, 8.9771e-05, 1.3310e-04,\n 1.1886e-04, 6.8161e-04, 1.2090e-04, 1.9303e-04, 2.9866e-04, 1.3166e-04,\n 1.5539e-04, 2.4175e-04, 4.0524e-04, 9.9937e-05, 1.0646e-04, 2.1116e-04,\n 1.4154e-04, 1.9992e-04, 1.8720e-04, 1.7202e-04, 1.4326e-04, 2.6748e-04,\n 3.2066e-04, 1.2660e-04, 8.3293e-05, 2.7975e-04, 1.5592e-04, 2.4764e-04,\n 1.1224e-04, 2.4504e-04, 1.7405e-04, 1.4489e-04, 1.7651e-04, 3.3916e-04,\n 2.2134e-04, 2.0007e-04, 1.8501e-04, 4.1757e-04, 1.6057e-04, 1.7545e-04,\n 1.6032e-04, 1.5428e-04, 2.3689e-04, 1.1315e-04, 6.6128e-05, 1.4059e-04,\n 1.7540e-04, 1.2061e-04, 2.4748e-04, 1.7512e-04, 5.8312e-05, 1.1535e-04,\n 8.3178e-05, 2.1024e-04, 2.0771e-04, 6.7370e-05, 7.4603e-05, 9.3156e-05,\n 1.2705e-04, 1.1795e-04, 1.2496e-04, 8.2481e-05, 1.8947e-04, 1.6479e-04,\n 1.2039e-04, 2.1047e-04, 1.1586e-04, 1.5753e-04, 1.2669e-04, 1.0907e-04,\n 7.0165e-05, 3.7207e-05, 2.1764e-04, 7.4360e-05, 1.2548e-04, 2.0052e-04,\n 8.7653e-05, 9.4995e-05, 1.9266e-04, 1.2327e-04, 2.4601e-04, 1.3512e-04,\n 1.4669e-04, 2.0777e-04], device='cuda:0')"
19
  },
20
  "3": {
21
- "step": "tensor(12520.)",
22
- "exp_avg": "tensor([ 4.1373e-03, 2.0046e-03, -2.0093e-03, 6.0270e-04, -4.2223e-03,\n 1.4737e-03, -8.3424e-04, 9.9720e-04, 1.3730e-03, 2.3100e-03,\n -1.5413e-03, 3.5469e-03, -9.4580e-04, -2.4875e-03, -1.5314e-03,\n 1.3928e-03, -5.4596e-03, -9.6337e-04, 2.7346e-03, 2.4711e-03,\n 2.5775e-03, -3.8221e-04, 2.5814e-03, 1.7790e-03, -1.9503e-03,\n 1.3163e-04, 1.7431e-04, -2.8731e-03, -2.3889e-04, -1.5291e-03,\n 4.2299e-04, -5.5862e-05, 5.5053e-04, 2.2228e-03, -1.0593e-03,\n 1.3684e-03, -1.3330e-03, 6.9733e-03, -8.4587e-04, -5.1940e-03,\n -5.5081e-03, -1.6495e-04, -7.0945e-04, -1.2496e-03, -4.8574e-04,\n 2.2032e-03, 2.6144e-03, 1.0518e-03, 4.1722e-04, 1.1655e-03,\n 4.6709e-05, 4.1575e-04, 3.0638e-04, 3.0734e-04, 1.9658e-03,\n 3.8384e-03, 7.7699e-04, 1.5948e-03, -1.2091e-03, 1.6686e-03,\n 1.5183e-03, -4.7683e-04, -4.1525e-04, 3.2692e-03, -2.4660e-04,\n -2.1823e-03, 4.1902e-03, 1.1494e-03, -2.8962e-03, -1.8233e-03,\n 7.8810e-03, 2.8802e-03, -2.3219e-05, -7.2163e-04, 5.2711e-04,\n -2.0807e-03, 2.9093e-03, -2.8891e-04, 8.1500e-04, -1.8922e-03,\n 3.2743e-03, -1.3520e-03, -6.1703e-03, -2.3846e-03, 5.1012e-03,\n -6.7753e-03, -2.0012e-04, -1.3147e-03, 5.6052e-45, -7.7328e-04,\n 1.3411e-03, 1.7005e-03, 1.1283e-03, -3.3912e-03, -8.7438e-04,\n 4.0185e-03, 1.2406e-03, 2.2250e-03, 3.4867e-03, -1.1898e-04,\n -3.9281e-03, 1.7390e-03, 1.3889e-03, 3.9410e-03, 1.2456e-03,\n -4.8506e-04, -1.4997e-04, 9.4303e-05, 6.5614e-04, -3.2496e-03,\n 1.9876e-03, 2.3487e-03, -3.6933e-03, 9.7908e-04, 4.1263e-03,\n 8.8741e-04, -3.6422e-03, -2.4905e-03, 2.2794e-03, 1.6183e-03,\n 3.6154e-05, -3.4959e-03, 9.0872e-04, 3.3306e-03, 3.5380e-03,\n -3.7241e-04, -1.1521e-03, -4.3947e-03, -8.5493e-05, -2.7435e-05,\n -1.4064e-04, -4.2322e-03, 3.0396e-03, -7.7105e-04, -1.2111e-03,\n -9.3919e-04, -1.1787e-03, -1.9423e-03, 1.0555e-03, 1.9220e-03,\n -8.3652e-04, 3.1769e-03, 1.5137e-03, -1.9903e-03, -1.4570e-03,\n 1.5441e-03, 3.2713e-03, 1.6705e-03, 1.7195e-03, 1.9656e-03,\n -4.1300e-04, 1.9559e-03, -5.4321e-03, -1.5669e-03, -2.2334e-03,\n 2.4268e-03, 2.7842e-03, 4.8676e-03, 6.5762e-04, -8.7739e-04,\n -6.4392e-03, -7.7498e-04, -1.7475e-03, 1.0667e-03, -1.3875e-03,\n -5.3889e-04, 7.3358e-04, -1.1321e-03, -3.8308e-04, -9.5364e-04,\n 3.0327e-04, 1.7532e-03, 7.7406e-04, -6.5761e-04, -1.4295e-03,\n 2.8804e-03, -3.4327e-04, 1.2974e-03, 1.2066e-03, 6.5669e-04,\n 2.3948e-03, 1.8412e-03, -1.0351e-02, 3.7319e-03, -1.2403e-03,\n 4.8896e-04, -1.5015e-04, -2.0977e-03, -5.2152e-04, -2.4023e-04,\n 5.4263e-04, -1.2562e-02, 2.0852e-03, 2.6810e-03, 1.6668e-04,\n -9.7694e-04, -1.5307e-03, 9.7498e-04, -5.8696e-05, 3.7297e-04,\n -2.0433e-04, -5.3733e-03, -4.4945e-04, 1.9818e-03, 1.4138e-03,\n 8.7984e-04, 3.6068e-04, 1.0165e-03, -1.0187e-03, 5.3284e-04,\n 1.2509e-03, 1.3534e-03, -2.5382e-03, -4.6449e-03, -1.1655e-03,\n 2.8933e-04, -5.1909e-03, 1.3201e-03, -8.8282e-04, 4.1278e-04,\n -1.8642e-03, -4.7300e-04, 4.0012e-04, 3.6222e-04, 2.4341e-03,\n 7.5427e-04, -1.9912e-03, -3.2787e-03, 1.2163e-03, -4.0500e-04,\n -9.6603e-04, -1.7266e-03, 7.8661e-04, 4.0949e-03, -5.3389e-03,\n 3.5580e-03, 3.0716e-03, 3.2913e-03, 4.7870e-04, 9.9273e-04,\n -3.3544e-03, 1.8581e-03, -9.1055e-04, 4.8808e-04, -4.0986e-04,\n -3.3548e-03, 7.5316e-04, -3.1205e-03, 1.4747e-03, 8.9531e-04,\n 1.1409e-03, -2.4975e-03, 1.9354e-03, 1.7225e-04, 1.4483e-03,\n -8.2022e-04, 1.0391e-03, 2.7731e-03, 8.6482e-04, 1.5326e-03,\n -3.7473e-03, -3.1354e-04, 3.3995e-03, -1.8105e-04, -2.8787e-04,\n -8.1246e-03, -3.2545e-03, 2.0741e-03, -1.9482e-03, -1.4571e-03,\n -6.8159e-04, 7.9098e-04, -2.5212e-03, -1.1660e-03, -6.3629e-04,\n 2.1819e-03, 1.2585e-03, 3.1514e-04, 6.4672e-04, 4.3465e-03,\n 1.0540e-03, -3.3350e-03, 1.9752e-03, -4.5015e-04, 1.2544e-03,\n 3.8538e-03, -1.2945e-03, 2.2922e-03, 1.9641e-03, -1.4458e-03,\n 2.9125e-03, -5.2555e-03, -1.2550e-03, -4.5086e-04, 1.8257e-03,\n 1.6713e-03, -2.5974e-03, -1.5011e-03, 1.2974e-03, -5.5984e-04,\n 7.4047e-04, -4.3050e-03, 2.2720e-03, -1.1104e-04, 4.3742e-04,\n -9.5901e-04, 7.1693e-04, -1.3201e-03, 4.0388e-04, -3.2207e-03,\n -5.4032e-03, 9.7979e-04, -1.6721e-03, -1.9822e-03, -1.7101e-03,\n -1.4230e-03, 1.6750e-03, -2.8188e-03, -1.2123e-03, 8.2985e-04,\n -3.1523e-04, -9.4855e-04, -6.0235e-04, -2.6111e-03, 3.0811e-03,\n -4.4801e-03, -9.1183e-04, -3.7965e-03, -7.7872e-04, 1.7078e-04,\n -8.8201e-04, 1.0211e-03, 1.7733e-03, -2.4294e-03, 3.0546e-04,\n 1.1347e-03, 2.8915e-03, 6.0442e-04, -2.1089e-04, -1.5874e-04,\n -5.0392e-04, 1.5891e-04, 3.3765e-03, 1.1523e-03, 2.3523e-03,\n 7.5852e-05, 2.2568e-03, -1.8872e-03, -5.0721e-03, -5.8292e-04,\n 3.1584e-03, -3.1155e-03, 1.5505e-03, 1.3268e-03, 4.9919e-04,\n 9.5487e-04, -2.1924e-03, -5.9790e-04, -1.1957e-03, 7.5898e-04,\n -9.5407e-04, 4.0625e-04, 7.3509e-04, -7.1221e-04, -7.7495e-04,\n 5.2254e-04, -1.9950e-04, -8.1209e-03, 2.6934e-04, -1.7418e-03,\n -6.0392e-04, 3.3616e-03, -1.0331e-03, -9.3736e-04, 2.6034e-03,\n 1.9261e-03, 2.2705e-03, 4.3738e-03, -3.5279e-03, 1.5247e-03,\n 1.2727e-03, 5.8993e-05, -4.4575e-03, 4.1534e-04, -3.7232e-04,\n -3.6865e-06, 1.8960e-04, -7.5477e-04, 3.0180e-03, -2.6332e-04,\n 2.7126e-03, 3.2287e-03, -5.6548e-03, -8.4408e-04, -7.5098e-04,\n 3.8546e-03, -2.0561e-03, 4.3892e-03, -3.7621e-04, -6.6576e-04,\n 1.6865e-03, 2.4908e-03, 2.0277e-03, 1.8386e-03, -1.8361e-03,\n -4.7032e-03, 2.9726e-03, -1.8173e-03, -2.0728e-03, -3.8943e-04,\n -1.3252e-03, 1.1757e-03, 6.1412e-04, -1.5215e-03, 2.2864e-03,\n -7.2901e-04, -1.1392e-04, -1.9575e-03, 2.4979e-03, 3.3975e-04,\n 6.5262e-04, -2.4489e-03, 1.3481e-03, 2.6548e-03, 1.7274e-05,\n 1.0811e-03, -2.7577e-04, 5.4188e-03, 1.6739e-04, 2.0993e-03,\n -4.7362e-03, -6.7126e-04, -5.8165e-04, 4.8723e-04, -2.2167e-03,\n 3.4872e-04, -1.3695e-03, 4.6983e-04, 5.5412e-04, 5.1113e-04,\n -5.1817e-03, -1.3361e-04, 2.3678e-03, -9.3557e-04, 8.6378e-04,\n 8.7715e-04, -3.7631e-03, 6.0648e-04, 1.5008e-03, 2.2842e-03,\n 7.0679e-04, 1.1943e-03, -1.2411e-03, -8.6336e-05, 2.3234e-03,\n -4.5270e-06, -3.4252e-03, -3.8450e-04, -2.6986e-03, -1.1898e-03,\n 1.0207e-03, 5.0271e-04, 1.5110e-03, -6.0521e-03, 3.2933e-04,\n -3.5102e-03, 3.3018e-05, -1.4676e-04, -1.1211e-03, -1.0126e-03,\n 1.2445e-03, -1.2805e-03, -2.9231e-03, 1.6986e-04, -6.8848e-03,\n -2.9526e-03, -1.4090e-03, -3.0923e-03, 7.3828e-04, -6.2815e-04,\n 1.6140e-03, 5.2493e-04, -2.2259e-03, 2.9834e-04, 1.0423e-03,\n -1.2359e-03, 5.9277e-03, -4.0567e-04, -2.2839e-03, 2.2975e-03,\n 8.5013e-04, 1.2484e-03, 9.4312e-04, -3.0186e-03, -1.1297e-03,\n -6.7248e-03, -4.6395e-04, 2.6604e-03, -9.3869e-05, 2.6552e-03,\n -5.0892e-04, -1.8761e-04, 2.3289e-03, -1.3559e-03, -3.5754e-04,\n -1.7077e-03, 4.8932e-03, 1.9159e-03, -4.0395e-03, 1.0994e-03,\n -7.8532e-04, 4.4951e-03], device='cuda:0')",
23
- "exp_avg_sq": "tensor([3.6805e-05, 7.3937e-05, 5.8542e-05, 4.8088e-05, 4.3990e-05, 4.0886e-05,\n 2.6812e-05, 3.4669e-05, 5.0843e-05, 3.8308e-05, 4.7279e-05, 5.2565e-05,\n 6.5203e-05, 8.4050e-05, 5.7575e-05, 3.8645e-05, 6.9725e-05, 5.1133e-05,\n 6.2768e-05, 5.3528e-05, 5.3246e-05, 4.0388e-05, 5.1934e-05, 2.9991e-05,\n 1.0893e-04, 3.7622e-05, 5.0818e-05, 7.2045e-05, 6.4861e-05, 5.5137e-05,\n 4.9805e-05, 6.3562e-05, 3.0306e-05, 3.3061e-05, 7.6479e-05, 2.9636e-05,\n 6.9661e-05, 7.5459e-05, 2.9736e-05, 5.0717e-05, 3.8693e-05, 4.8324e-05,\n 4.2262e-05, 4.8798e-05, 3.5006e-05, 4.1160e-05, 4.8768e-05, 4.2778e-05,\n 5.9573e-05, 3.2704e-05, 5.4468e-05, 6.7364e-05, 4.3102e-05, 5.5270e-05,\n 4.2642e-05, 4.3430e-05, 3.8092e-05, 4.9398e-05, 5.3931e-05, 7.0820e-05,\n 6.1443e-05, 4.6501e-05, 5.6354e-05, 1.5672e-04, 4.8840e-05, 4.5152e-05,\n 6.3199e-05, 5.4005e-05, 3.1149e-05, 6.2683e-05, 5.1026e-05, 6.7359e-05,\n 5.0650e-05, 4.1484e-05, 3.3168e-05, 9.6477e-05, 4.6257e-05, 8.8566e-05,\n 5.0603e-05, 5.7138e-05, 6.5175e-05, 8.0373e-05, 8.1385e-05, 5.9455e-05,\n 6.2905e-05, 4.8052e-05, 6.2945e-05, 8.1142e-05, 6.4220e-11, 7.1123e-05,\n 4.5701e-05, 9.2766e-05, 4.2083e-05, 4.4945e-05, 8.5200e-05, 4.2624e-05,\n 4.2242e-05, 5.2274e-05, 4.8477e-05, 6.2582e-05, 4.0729e-05, 4.0337e-05,\n 3.6264e-05, 8.7707e-05, 5.6182e-05, 4.7374e-05, 4.4159e-05, 4.1929e-05,\n 3.7908e-05, 5.1894e-05, 4.5529e-05, 3.7234e-05, 5.9186e-05, 5.2459e-05,\n 4.2729e-05, 3.9106e-05, 5.7394e-05, 5.0099e-05, 3.9905e-05, 2.9276e-05,\n 5.7881e-05, 4.7650e-05, 5.6506e-05, 5.1832e-05, 3.9178e-05, 3.9626e-05,\n 3.1517e-05, 3.0676e-05, 4.3402e-05, 4.4518e-05, 3.8888e-05, 6.8498e-05,\n 5.4844e-05, 6.6723e-05, 8.1918e-05, 6.4189e-05, 4.3603e-05, 5.6950e-05,\n 3.8120e-05, 4.8239e-05, 5.7051e-05, 4.6541e-05, 5.7968e-05, 5.7197e-05,\n 5.3428e-05, 6.0866e-05, 7.3385e-05, 5.3849e-05, 4.7500e-05, 6.7541e-05,\n 3.5680e-05, 4.1777e-05, 4.6011e-05, 4.2314e-05, 4.3536e-05, 4.8088e-05,\n 5.4245e-05, 2.9563e-05, 6.1623e-05, 4.8354e-05, 4.7403e-05, 3.3478e-05,\n 4.7706e-05, 5.0560e-05, 2.5178e-05, 5.8178e-05, 4.8551e-05, 6.4073e-05,\n 4.9725e-05, 6.1510e-05, 4.1554e-05, 6.2202e-05, 5.8705e-05, 6.1497e-05,\n 8.6059e-05, 4.6032e-05, 5.1410e-05, 6.2621e-05, 4.8253e-05, 8.4380e-05,\n 7.0384e-05, 6.1315e-05, 5.5112e-05, 1.0852e-04, 7.6373e-05, 3.4335e-05,\n 6.9645e-05, 8.8747e-05, 4.7956e-05, 6.3612e-05, 4.5646e-05, 9.6595e-05,\n 6.8487e-05, 3.4970e-05, 5.8962e-05, 3.7908e-05, 4.8100e-05, 3.3616e-05,\n 4.5076e-05, 3.2201e-05, 6.0276e-05, 5.6248e-05, 5.7944e-05, 5.7331e-05,\n 4.0503e-05, 5.3161e-05, 3.5984e-05, 3.6681e-05, 3.2959e-05, 5.6431e-05,\n 3.8415e-05, 6.7970e-05, 7.1759e-05, 4.2055e-05, 7.3042e-05, 4.5809e-05,\n 4.5942e-05, 5.8677e-05, 6.2726e-05, 5.6845e-05, 6.5308e-05, 8.0984e-05,\n 6.9033e-05, 3.9029e-05, 5.2816e-05, 3.6832e-05, 7.3528e-05, 3.9509e-05,\n 4.5968e-05, 4.2868e-05, 2.6907e-05, 4.7027e-05, 5.4151e-05, 5.9676e-05,\n 6.3634e-05, 4.2313e-05, 8.9617e-05, 8.6283e-05, 3.4269e-05, 3.8427e-05,\n 5.9826e-05, 3.3533e-05, 3.3339e-05, 5.1069e-05, 3.5340e-05, 4.9429e-05,\n 3.5866e-05, 4.8919e-05, 6.0242e-05, 4.0861e-05, 5.5402e-05, 4.6662e-05,\n 6.1038e-05, 4.1311e-05, 4.6646e-05, 4.1644e-05, 3.2148e-05, 7.7027e-05,\n 3.5975e-05, 4.7480e-05, 4.9559e-05, 6.6777e-05, 6.3208e-05, 3.8730e-05,\n 4.1173e-05, 7.3003e-05, 4.1849e-05, 3.8839e-05, 3.9126e-05, 6.9553e-05,\n 4.0585e-05, 2.9138e-05, 6.2635e-05, 3.7909e-05, 4.0113e-05, 7.8943e-05,\n 5.4639e-05, 3.9306e-05, 6.6166e-05, 5.5038e-05, 5.9273e-05, 5.1681e-05,\n 9.0506e-05, 6.3974e-05, 4.2866e-05, 6.5564e-05, 4.0217e-05, 6.9711e-05,\n 6.2263e-05, 4.4330e-05, 3.6692e-05, 1.0448e-04, 4.6788e-05, 3.8016e-05,\n 4.7864e-05, 4.4140e-05, 9.7484e-05, 1.0209e-04, 2.2831e-05, 6.1538e-05,\n 3.8199e-05, 8.3140e-05, 3.7347e-05, 3.9100e-05, 5.4662e-05, 7.6378e-05,\n 6.5059e-05, 4.4603e-05, 4.1620e-05, 4.7557e-05, 5.5709e-05, 4.5426e-05,\n 3.7046e-05, 4.4605e-05, 2.6834e-05, 3.0995e-05, 6.3699e-05, 5.4689e-05,\n 4.8254e-05, 4.9489e-05, 5.9861e-05, 8.7019e-05, 8.2105e-05, 7.5413e-05,\n 6.8942e-05, 3.4976e-05, 4.0872e-05, 4.8758e-05, 4.2230e-05, 5.7723e-05,\n 5.3527e-05, 7.1423e-05, 5.4537e-05, 4.8284e-05, 3.0305e-05, 4.6845e-05,\n 4.7584e-05, 5.4110e-05, 4.3925e-05, 5.6366e-05, 5.5120e-05, 5.4311e-05,\n 4.3438e-05, 6.1051e-05, 4.8467e-05, 8.8103e-05, 6.8654e-05, 5.1174e-05,\n 3.7494e-05, 7.2669e-05, 4.0038e-05, 6.6344e-05, 5.6343e-05, 4.5152e-05,\n 4.7583e-05, 7.0478e-05, 5.1679e-05, 5.4520e-05, 6.6571e-05, 3.1084e-05,\n 6.3296e-05, 2.6308e-05, 3.3439e-05, 5.3752e-05, 5.2618e-05, 2.7523e-05,\n 3.7038e-05, 8.8722e-05, 7.1056e-05, 3.8261e-05, 8.3993e-05, 4.2115e-05,\n 4.8095e-05, 2.9971e-05, 5.9695e-05, 1.2726e-04, 3.6123e-05, 8.2811e-05,\n 4.8126e-05, 4.2755e-05, 5.5365e-05, 4.5476e-05, 1.2404e-04, 6.6355e-05,\n 6.3814e-05, 7.2946e-05, 3.1705e-05, 6.6186e-05, 5.8457e-05, 8.2941e-05,\n 6.4748e-05, 6.5316e-05, 3.9171e-05, 4.4058e-05, 5.1926e-05, 4.7984e-05,\n 3.8446e-05, 5.4465e-05, 5.1647e-05, 3.1666e-05, 7.6487e-05, 6.4791e-05,\n 5.2313e-05, 3.0687e-05, 3.3275e-05, 9.8068e-05, 4.6403e-05, 4.3078e-05,\n 4.8287e-05, 4.6822e-05, 5.4165e-05, 5.4477e-05, 4.7440e-05, 4.6478e-05,\n 5.7160e-05, 3.8572e-05, 5.0871e-05, 7.1905e-05, 6.5379e-05, 8.3975e-05,\n 4.8659e-05, 7.8890e-05, 5.9239e-05, 5.5878e-05, 5.8704e-05, 5.9433e-05,\n 4.2919e-05, 5.8995e-05, 4.5554e-05, 4.7780e-05, 3.8061e-05, 4.5313e-05,\n 3.7787e-05, 9.1572e-05, 4.3879e-05, 5.7953e-05, 8.3205e-05, 5.5552e-05,\n 5.6977e-05, 6.7190e-05, 1.2310e-04, 4.0099e-05, 4.8564e-05, 5.5910e-05,\n 4.8675e-05, 5.7628e-05, 5.4614e-05, 4.3573e-05, 5.9665e-05, 6.0839e-05,\n 8.8962e-05, 4.6997e-05, 4.5268e-05, 5.6266e-05, 5.0640e-05, 6.1417e-05,\n 3.4373e-05, 5.3757e-05, 5.2571e-05, 4.0398e-05, 6.0936e-05, 6.6020e-05,\n 5.2451e-05, 5.8451e-05, 7.0247e-05, 8.8714e-05, 6.4880e-05, 7.7108e-05,\n 6.8827e-05, 4.7016e-05, 9.8448e-05, 3.1812e-05, 2.9540e-05, 5.3065e-05,\n 5.1030e-05, 3.9378e-05, 4.2668e-05, 4.9246e-05, 3.8719e-05, 4.8421e-05,\n 4.6622e-05, 8.3083e-05, 6.2717e-05, 3.0664e-05, 3.8283e-05, 5.1384e-05,\n 6.8495e-05, 4.8804e-05, 5.5397e-05, 2.6952e-05, 6.3484e-05, 5.1081e-05,\n 4.0286e-05, 6.1025e-05, 4.4607e-05, 7.0251e-05, 3.9424e-05, 3.7349e-05,\n 3.9142e-05, 1.7464e-05, 6.8029e-05, 4.3056e-05, 3.9677e-05, 5.1420e-05,\n 3.7806e-05, 4.3370e-05, 5.8078e-05, 4.1708e-05, 8.6987e-05, 4.2758e-05,\n 5.5073e-05, 5.0651e-05], device='cuda:0')"
24
  },
25
  "4": {
26
- "step": "tensor(12520.)",
27
- "exp_avg": "tensor([[-5.9020e-06, 1.8948e-06, 1.6365e-05, ..., 3.1623e-05,\n -3.9503e-06, 1.3279e-06],\n [ 8.9468e-06, 3.4703e-05, 9.5180e-06, ..., 5.4239e-05,\n 1.9834e-05, 3.0988e-05],\n [-1.1299e-05, -1.0186e-05, -1.4260e-05, ..., -1.5816e-05,\n -6.0828e-06, 1.3052e-05],\n ...,\n [ 9.0819e-06, -2.3612e-05, 3.1079e-05, ..., 1.8375e-05,\n 2.3136e-07, 3.0862e-06],\n [ 2.7104e-06, -2.3923e-06, 1.6058e-05, ..., -1.9826e-05,\n -1.3109e-05, 1.1551e-05],\n [ 2.2167e-06, -1.8830e-05, -4.2202e-05, ..., -1.0402e-05,\n -2.1073e-05, -8.1033e-06]], device='cuda:0')",
28
- "exp_avg_sq": "tensor([[2.0372e-09, 4.1398e-09, 7.6519e-09, ..., 3.2138e-09, 3.1756e-09,\n 6.5789e-09],\n [4.5471e-09, 1.1133e-08, 5.7402e-09, ..., 4.9883e-09, 3.9634e-09,\n 6.0490e-09],\n [6.5539e-09, 1.0672e-08, 4.5197e-09, ..., 7.3073e-09, 2.8495e-09,\n 1.2085e-08],\n ...,\n [4.5090e-09, 8.3606e-09, 6.3867e-09, ..., 6.4894e-09, 3.2104e-09,\n 9.0154e-09],\n [5.1814e-09, 8.1224e-09, 5.4082e-09, ..., 5.9186e-09, 7.4052e-09,\n 1.1339e-08],\n [5.1179e-09, 6.1387e-09, 8.3817e-09, ..., 7.3411e-09, 4.1967e-09,\n 1.0435e-08]], device='cuda:0')"
29
- },
30
- "5": {
31
- "step": "tensor(12520.)",
32
- "exp_avg": "tensor([[ 1.7989e-06, 4.2190e-06, 6.5448e-06, ..., 3.0508e-06,\n -1.8514e-06, 5.9576e-06],\n [ 6.3176e-06, 1.6977e-05, 1.1505e-05, ..., 3.7173e-05,\n 4.9809e-06, 1.5525e-05],\n [-1.6708e-05, 9.5249e-06, -1.1249e-05, ..., -1.5451e-05,\n -7.9721e-06, 1.9833e-05],\n ...,\n [ 1.0364e-05, 1.1385e-05, -1.1192e-05, ..., -8.4844e-06,\n -5.6640e-06, -1.1401e-05],\n [-1.7906e-06, 1.9834e-06, 1.0815e-06, ..., -1.2380e-05,\n -6.2534e-06, -9.2382e-06],\n [ 9.6819e-06, -7.3784e-06, 4.8784e-06, ..., -1.4418e-05,\n -9.4953e-06, -7.2564e-06]], device='cuda:0')",
33
- "exp_avg_sq": "tensor([[8.1560e-10, 1.3475e-09, 1.6525e-09, ..., 1.2488e-09, 5.9595e-10,\n 2.3552e-09],\n [1.7554e-09, 4.1794e-09, 1.5715e-09, ..., 1.7226e-09, 1.9271e-09,\n 2.3219e-09],\n [2.2232e-09, 4.9447e-09, 1.8756e-09, ..., 2.8584e-09, 1.2778e-09,\n 3.8880e-09],\n ...,\n [2.5199e-09, 3.9330e-09, 2.5599e-09, ..., 1.6822e-09, 1.6663e-09,\n 1.8369e-09],\n [2.6684e-09, 7.1982e-09, 2.0619e-09, ..., 2.2440e-09, 1.4449e-09,\n 2.9132e-09],\n [1.2986e-09, 3.4277e-09, 1.6512e-09, ..., 2.6567e-09, 1.1022e-09,\n 3.1725e-09]], device='cuda:0')"
34
- },
35
- "6": {
36
- "step": "tensor(12520.)",
37
- "exp_avg": "tensor([ 0.0002, -0.0002], device='cuda:0')",
38
- "exp_avg_sq": "tensor([5.9675e-06, 5.9675e-06], device='cuda:0')"
39
  }
40
  },
41
  "param_groups": [
42
  {
43
- "lr": 0.01,
44
  "name": "shared",
45
  "betas": [
46
  0.9,
@@ -55,16 +45,14 @@
55
  "differentiable": false,
56
  "fused": null,
57
  "decoupled_weight_decay": true,
58
- "initial_lr": 0.01,
59
  "params": [
60
  0,
61
- 1,
62
- 2,
63
- 3
64
  ]
65
  },
66
  {
67
- "lr": 0.01,
68
  "name": "scale_256",
69
  "betas": [
70
  0.9,
@@ -79,13 +67,15 @@
79
  "differentiable": false,
80
  "fused": null,
81
  "decoupled_weight_decay": true,
82
- "initial_lr": 0.01,
83
  "params": [
 
 
84
  4
85
  ]
86
  },
87
  {
88
- "lr": 0.01,
89
  "name": "scale_512",
90
  "betas": [
91
  0.9,
@@ -100,13 +90,199 @@
100
  "differentiable": false,
101
  "fused": null,
102
  "decoupled_weight_decay": true,
103
- "initial_lr": 0.01,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  "params": [
105
- 5
 
 
106
  ]
107
  },
108
  {
109
- "lr": 0.005,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  "name": "fusion",
111
  "betas": [
112
  0.9,
@@ -121,60 +297,115 @@
121
  "differentiable": false,
122
  "fused": null,
123
  "decoupled_weight_decay": true,
124
- "initial_lr": 0.005,
125
  "params": [
126
- 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  ]
128
  }
129
  ]
130
  },
131
  "scheduler_state_dict": {
132
  "T_0": 10,
133
- "T_i": 20,
134
  "T_mult": 2,
135
  "eta_min": 1e-06,
136
- "T_cur": 0,
137
  "base_lrs": [
138
- 0.01,
139
- 0.01,
140
- 0.01,
141
- 0.005
 
 
 
 
 
 
 
 
142
  ],
143
- "last_epoch": 10,
144
  "_step_count": 0,
145
  "_is_initial": false,
146
  "_get_lr_called_within_step": false,
147
  "_last_lr": [
148
- 0.01,
149
- 0.01,
150
- 0.01,
151
- 0.005
 
 
 
 
 
 
 
 
152
  ]
153
  },
154
  "metrics": {
155
- "best_val_acc": 75.38,
156
- "best_epoch": 9,
157
  "scale_accuracies": {
158
- "256": 75.102,
159
- "512": 75.302
160
  }
161
  },
162
  "train_config": {
163
  "name": "david_training",
164
  "run_id": "20251012_060013",
165
  "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
166
- "model_variant": "clip_vit_b16",
167
  "num_classes": 1000,
168
- "preset": "small_fast",
169
  "custom_config_path": null,
170
  "num_classes_override": null,
171
  "use_belly_override": null,
172
  "belly_expand_override": null,
173
- "progressive_training_override": false,
174
- "scale_warmup_epochs_override": null,
 
 
 
 
 
 
 
 
 
 
 
175
  "num_epochs": 10,
176
  "batch_size": 1024,
177
- "learning_rate": 0.01,
178
  "weight_decay": 1e-05,
179
  "warmup_epochs": 0,
180
  "use_rose_loss": true,
 
1
  {
2
+ "epoch": 0,
3
  "optimizer_state_dict": {
4
  "state": {
5
  "0": {
6
+ "step": "tensor(1252.)",
7
+ "exp_avg": "tensor([[ 7.6931e-04, -3.5975e-04, -2.7454e-04, ..., 3.4282e-04,\n 1.6405e-04, -2.7417e-04],\n [-1.1996e-04, -1.1731e-04, -9.9866e-05, ..., 1.6243e-04,\n 2.2704e-04, 5.8436e-05],\n [-2.3594e-05, -1.2594e-04, 1.5107e-04, ..., -3.0109e-04,\n -7.9584e-05, 6.9697e-05],\n ...,\n [-1.2359e-04, -1.4653e-05, 2.1027e-04, ..., -1.2977e-04,\n 1.0232e-04, 1.3696e-04],\n [ 1.5723e-04, 9.8239e-05, -1.2136e-04, ..., 1.6422e-04,\n 1.6617e-04, -3.1527e-04],\n [ 3.0708e-04, -7.8846e-04, 5.5491e-05, ..., -8.6530e-05,\n 2.0349e-04, -1.3880e-04]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[7.4118e-07, 1.0833e-06, 5.3915e-07, ..., 5.2782e-07, 4.1944e-07,\n 3.6632e-07],\n [3.1076e-07, 3.0220e-07, 3.1742e-07, ..., 2.1258e-07, 1.9540e-07,\n 1.6665e-07],\n [6.6361e-07, 7.0572e-07, 4.0617e-07, ..., 4.1036e-07, 3.5275e-07,\n 2.9789e-07],\n ...,\n [4.7457e-07, 3.5343e-07, 3.7321e-07, ..., 3.1882e-07, 2.7955e-07,\n 2.5810e-07],\n [5.7871e-07, 5.5442e-07, 4.4099e-07, ..., 3.3348e-07, 3.2790e-07,\n 2.9927e-07],\n [5.5174e-07, 5.0187e-07, 2.8968e-07, ..., 3.2638e-07, 3.1291e-07,\n 2.3588e-07]], device='cuda:0')"
9
  },
10
  "1": {
11
+ "step": "tensor(1252.)",
12
+ "exp_avg": "tensor([ 0.0155, -0.0063, -0.0059, ..., -0.0034, 0.0076, 0.0054],\n device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0012, 0.0005, 0.0010, ..., 0.0009, 0.0010, 0.0007], device='cuda:0')"
14
  },
15
  "2": {
16
+ "step": "tensor(1252.)",
17
+ "exp_avg": "tensor([[ 1.3985e-05, -5.8933e-05, 1.0128e-04, ..., 2.2678e-04,\n -1.4801e-04, -2.0768e-05],\n [ 4.3151e-04, 1.1516e-04, -8.9230e-05, ..., 4.8063e-05,\n -2.3465e-04, 4.5604e-06],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -3.1455e-32,\n 9.5699e-32, -5.6052e-45],\n ...,\n [ 6.3424e-09, -1.6865e-05, 2.2038e-07, ..., -4.1170e-07,\n -9.3469e-06, -1.4808e-07],\n [ 1.1432e-05, 3.8929e-05, -3.2332e-05, ..., 7.8134e-07,\n 4.3614e-05, -1.0174e-05],\n [-9.4698e-07, -2.8017e-07, -1.3962e-07, ..., -1.0875e-04,\n -4.0648e-07, -4.5041e-06]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[1.0298e-06, 4.7161e-07, 2.3258e-07, ..., 1.3600e-06, 7.8774e-07,\n 6.1178e-07],\n [1.9419e-06, 8.5299e-07, 6.2341e-07, ..., 1.7004e-06, 8.0653e-07,\n 6.4486e-07],\n [1.7955e-10, 7.0238e-10, 3.7342e-11, ..., 5.1371e-10, 1.6562e-10,\n 1.7251e-10],\n ...,\n [7.4679e-11, 4.4698e-09, 6.7743e-11, ..., 3.5092e-10, 5.4696e-09,\n 2.4203e-10],\n [1.2516e-09, 6.6520e-08, 8.9164e-09, ..., 7.1810e-09, 1.6729e-08,\n 4.2439e-09],\n [1.1515e-08, 7.9548e-09, 6.6302e-09, ..., 7.8305e-07, 3.6041e-09,\n 6.1598e-08]], device='cuda:0')"
19
  },
20
  "3": {
21
+ "step": "tensor(1252.)",
22
+ "exp_avg": "tensor([ 2.3875e-02, -4.3837e-02, 9.6032e-26, 4.2214e-02, 2.2210e-02,\n 3.3747e-02, 1.7797e-02, -7.0004e-03, -2.4879e-02, 2.7619e-02,\n -7.5551e-03, 1.7134e-02, 2.0256e-02, -2.3314e-03, -7.4220e-02,\n -4.7382e-03, 1.4119e-02, -1.1545e-02, -8.9664e-03, 2.9760e-02,\n 2.9607e-03, -2.0514e-02, -7.0222e-03, -4.5160e-02, -2.3961e-02,\n 2.8237e-04, -1.7209e-02, 2.0788e-02, -7.2040e-03, 3.1665e-02,\n 5.6030e-03, 3.4553e-02, 3.6798e-02, -7.2686e-03, 3.0085e-02,\n 7.4137e-03, -6.1774e-03, 5.0785e-02, 4.7387e-03, 3.7325e-02,\n 2.7934e-02, -3.6341e-02, -2.1846e-02, 5.6052e-45, -2.4211e-02,\n 2.2644e-03, 5.5101e-02, 1.4802e-02, -5.4617e-03, -1.9478e-02,\n -6.5484e-03, -1.6414e-02, -4.0664e-04, 4.5538e-02, 5.8758e-03,\n 2.9059e-02, -1.2425e-02, 3.5538e-03, 4.1299e-03, 9.5794e-03,\n -4.0616e-02, 4.8972e-03, 2.5462e-02, 7.2303e-02, 1.5787e-03,\n 4.5761e-02, 1.9496e-02, 4.2323e-02, 3.9632e-02, -1.0082e-03,\n 5.3938e-03, 3.3493e-05, -1.3032e-02, -3.8153e-02, 1.5953e-02,\n 4.4613e-06, 7.0803e-03, -3.0465e-04, -9.4197e-03, -2.4202e-02,\n 9.6786e-02, -7.4001e-03, -2.9757e-02, 2.0539e-02, -2.8599e-02,\n 2.9271e-02, -1.7663e-03, 7.1197e-03, 2.4915e-02, 1.2627e-32,\n -3.4740e-02, 8.6782e-03, -2.3629e-09, 1.6301e-29, 1.5997e-02,\n 4.4684e-02, 3.8713e-04, -1.4058e-02, 2.3174e-02, -1.3123e-02,\n 9.1376e-03, -1.8078e-02, 1.6822e-02, 1.1673e-05, 1.4093e-02,\n 3.1474e-02, -1.2471e-02, -3.0086e-02, -2.3609e-02, 7.3296e-03,\n 5.4473e-36, 1.4037e-02, -1.1540e-02, 3.6438e-03, 2.6305e-03,\n -1.2727e-02, 2.7998e-03, -1.4901e-02, 2.1351e-02, -4.3767e-15,\n 1.8248e-31, 6.4915e-02, 1.7753e-02, 4.6665e-02, 1.1710e-02,\n -1.0307e-02, 2.7988e-02, 2.5342e-02, 2.9874e-02, -3.5108e-02,\n -2.3540e-03, -6.9490e-02, -7.4078e-03, 2.1446e-02, -1.5282e-02,\n -9.1815e-03, -5.9992e-03, -1.6854e-02, -1.2549e-02, -2.3482e-02,\n -2.0157e-02, 2.2761e-19, 5.0847e-02, 2.5370e-02, -9.5745e-03,\n 2.6735e-02, 6.3855e-03, 2.5707e-02, -3.6340e-02, -5.3170e-02,\n 2.5905e-02, -2.7917e-04, 5.1961e-02, -8.5205e-03, -2.0016e-02,\n 7.9647e-03, 2.6824e-28, -1.5696e-02, 1.9396e-02, 9.1115e-03,\n -5.6675e-02, 3.7877e-02, -2.4820e-03, 3.0127e-02, 3.7650e-02,\n 1.3167e-02, -6.0690e-02, 1.5634e-03, 2.0013e-02, -2.8012e-03,\n 2.7729e-02, 6.1169e-03, 7.8048e-04, 5.2743e-03, -3.7234e-02,\n -6.2202e-03, -4.8082e-03, 3.5392e-02, 2.1980e-02, 6.6747e-03,\n -5.1266e-04, 4.6985e-02, 5.6052e-45, 7.1385e-05, 3.2047e-03,\n -3.6594e-02, 1.8168e-02, -6.2917e-02, -3.2929e-02, -5.6694e-03,\n 1.9831e-02, -8.2062e-04, 1.5245e-02, -3.5059e-02, -7.1714e-03,\n 3.2220e-03, -2.1203e-04, -6.0919e-03, 1.4150e-02, -1.7816e-02,\n -1.6064e-02, 1.0769e-02, -2.6079e-02, -9.8037e-03, -3.2816e-02,\n -5.2574e-02, -4.7450e-03, 1.9516e-02, 3.4608e-03, 1.7244e-02,\n 2.6142e-13, 6.6794e-04, 2.7124e-02, -3.4816e-02, -3.1702e-02,\n 3.3221e-03, 3.8579e-02, -3.2285e-02, 1.9722e-02, 1.8849e-02,\n -3.3065e-02, 5.6052e-45, -7.1891e-03, -2.5881e-02, 6.2611e-03,\n -1.4979e-02, -6.4840e-03, 3.5860e-02, -2.7866e-02, -1.3252e-02,\n 2.1320e-02, 1.7814e-02, -3.2913e-02, -2.2322e-02, 2.0048e-03,\n 9.2803e-03, -8.2572e-03, -5.2975e-03, 7.6453e-02, 4.8804e-02,\n -2.1919e-02, 5.6761e-03, 6.3496e-02, -2.8889e-02, 1.3841e-02,\n 8.5695e-03, -1.5521e-02, 4.9531e-02, -4.1589e-02, -1.9676e-02,\n -5.5857e-03, -2.3588e-03, -1.4790e-02, -1.3780e-03, -6.3026e-03,\n -1.0188e-02, -3.3702e-02, 1.9226e-42, 2.2306e-02, 2.6239e-02,\n 4.8861e-26, -5.2627e-03, 2.5603e-06, 7.0623e-03, -4.3391e-03,\n 6.8443e-03, -1.6604e-02, -1.0740e-02, -1.5329e-02, 2.6949e-03,\n -8.0152e-03, -5.2252e-03, 8.3405e-03, 4.8676e-03, 2.9752e-02,\n 2.1431e-02, 5.0216e-02, 2.7263e-02, 8.0592e-03, 8.6727e-35,\n -2.2584e-02, -2.4758e-02, -2.9040e-02, -1.3793e-02, 1.8160e-03,\n -6.5728e-04, -4.3327e-02, 5.8940e-03, -1.1455e-02, -1.5254e-02,\n -1.3119e-02, -2.3658e-02, -6.1895e-03, 2.2715e-02, 2.3466e-02,\n 9.7893e-03, 3.0946e-02, -5.9481e-02, 6.9836e-09, 3.0786e-02,\n 2.9153e-02, -2.5735e-02, -7.1205e-03, -2.9654e-03, 3.6341e-02,\n -1.8624e-02, 2.3698e-02, -4.9125e-03, 1.7921e-02, -4.5526e-03,\n 2.6053e-02, -3.2306e-02, 7.3569e-02, -2.9119e-02, -8.4770e-03,\n 2.6320e-02, -3.1927e-02, 8.3118e-03, 2.1609e-02, 8.7364e-03,\n -6.7107e-03, 1.2142e-02, 8.1370e-03, -3.0566e-02, -1.4636e-02,\n 1.1678e-30, 6.2592e-05, 7.6997e-03, -5.0519e-02, 5.8788e-03,\n 3.3712e-02, 1.9211e-11, -2.0315e-02, 1.6669e-02, -4.9647e-03,\n -8.5470e-03, -5.1239e-03, 2.8877e-14, 8.1417e-08, -3.9018e-03,\n -2.0326e-02, -1.0452e-02, 4.1878e-03, -1.8927e-02, -4.9301e-02,\n -2.3229e-02, 1.7848e-03, 8.9783e-03, -1.2318e-02, 5.6052e-45,\n -1.9077e-02, 1.0073e-02, 1.1951e-02, -4.6252e-04, 5.6052e-45,\n -8.4038e-03, -4.9877e-03, 2.4088e-02, -1.3139e-02, -3.8403e-03,\n 2.5313e-03, -2.6408e-02, 7.3548e-04, -9.5045e-04, 3.6078e-03,\n 8.7422e-03, -1.8084e-02, -1.2749e-02, -1.7397e-04, -4.0832e-02,\n 1.0829e-02, -3.6196e-02, 6.3083e-23, 3.7562e-27, 3.7017e-03,\n 2.1969e-03, -1.5024e-02, -2.7693e-09, 2.8061e-03, -8.4934e-03,\n -1.6902e-02, 2.5158e-04, -9.0583e-03, -7.4322e-03, 2.2614e-02,\n 1.1432e-02, 7.6335e-03, 3.2598e-02, -8.4487e-03, 6.1142e-03,\n -4.9562e-03, 3.3930e-03, 2.8782e-02, -1.7597e-02, 8.4994e-04,\n 1.8249e-02, -2.3071e-02, 5.7733e-32, 8.4071e-30, -6.5391e-03,\n 2.4742e-02, -1.5463e-02, -3.7512e-02, 1.4492e-03, 1.0992e-03,\n -4.5201e-02, -1.0661e-02, 5.9124e-03, -4.8737e-04, 2.2978e-33,\n -2.1474e-05, -3.3565e-02, 1.7868e-02, -3.2502e-04, 1.2170e-05,\n 3.8928e-03, 9.7415e-03, 2.9308e-02, 1.1940e-02, 1.9191e-02,\n -8.1647e-03, 3.0442e-03, -3.8888e-03, -2.5077e-02, 1.2250e-02,\n -1.6413e-03, -2.5531e-02, -4.8607e-03, 2.5237e-02, 4.8066e-02,\n 7.1922e-03, -1.8733e-02, 1.1283e-02, 1.6394e-06, 4.4103e-03,\n 1.4819e-02, -8.5443e-03, -5.3989e-02, -1.8563e-02, -4.3936e-02,\n -1.3040e-02, 7.8469e-03, -6.3018e-02, 6.5663e-03, 6.0159e-03,\n 8.6966e-03, -7.3879e-03, -1.2783e-02, -3.4078e-02, 1.4064e-02,\n -2.5382e-02, -1.6579e-04, 2.1868e-02, -3.5772e-02, -6.2800e-03,\n 7.7235e-04, 1.8875e-03, -3.4342e-02, 2.7571e-02, -1.1460e-02,\n 3.1115e-02, -4.2305e-03, 7.3832e-40, -8.5955e-04, -1.5269e-02,\n 2.9632e-02, 5.7214e-02, 6.3015e-02, -3.4968e-02, 2.6324e-02,\n 1.2256e-02, 8.9374e-13, 3.2504e-02, -3.6663e-02, 2.8071e-19,\n 8.0190e-02, -5.5870e-03, -3.0679e-03, 2.4492e-03, 3.3198e-02,\n -2.3608e-03, 5.6052e-45, 1.9051e-02, 5.2085e-02, 1.8931e-02,\n 6.1182e-03, -6.7992e-02, 5.6052e-45, 5.1587e-03, 3.5890e-02,\n 2.1678e-02, -1.4406e-02, -5.4601e-03, 1.9144e-02, 1.2888e-02,\n 5.7364e-05, 1.1321e-02, -3.3445e-02, -1.4640e-04, 3.2436e-07,\n -2.9884e-03, -4.3738e-02, 1.6330e-17, -4.8169e-02, 9.6817e-03,\n 2.2606e-25, 3.1364e-02, 5.9806e-04, -2.9483e-02, -9.8904e-04,\n 2.6328e-03, 8.8098e-03], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([1.8764e-02, 1.8272e-02, 2.1116e-06, 1.3778e-02, 3.6853e-03, 6.6573e-03,\n 1.8188e-02, 1.7011e-02, 1.8539e-02, 1.0614e-02, 3.9648e-03, 2.1009e-02,\n 3.1433e-03, 5.8576e-03, 1.6608e-02, 7.1616e-05, 4.8830e-03, 1.9907e-02,\n 6.7086e-03, 3.3539e-03, 4.3796e-03, 1.7794e-02, 5.7634e-03, 1.8685e-02,\n 1.5659e-02, 2.9941e-04, 1.5636e-02, 1.4540e-02, 1.8737e-02, 1.6553e-02,\n 1.5784e-02, 1.6078e-02, 1.6327e-02, 7.0099e-03, 1.0621e-02, 1.5467e-02,\n 1.3924e-03, 1.5506e-02, 1.8211e-02, 1.6032e-02, 1.1937e-02, 1.1553e-02,\n 4.5409e-04, 1.0012e-08, 1.4126e-02, 1.8995e-03, 2.2158e-02, 1.6567e-02,\n 1.8036e-02, 1.7208e-02, 4.1166e-04, 1.7573e-02, 3.8096e-03, 1.2772e-02,\n 2.0461e-02, 1.2595e-02, 2.1758e-02, 2.0982e-02, 1.5789e-02, 7.3397e-03,\n 1.5701e-02, 1.0106e-02, 1.5815e-02, 1.6533e-02, 2.0341e-02, 1.5778e-02,\n 5.0521e-03, 1.5900e-02, 2.2204e-02, 1.7102e-03, 1.6514e-02, 1.9365e-06,\n 8.2713e-03, 1.6110e-02, 1.5514e-02, 9.3026e-08, 1.3545e-02, 1.7353e-02,\n 1.5866e-02, 1.7169e-02, 1.9033e-02, 1.5958e-02, 8.2837e-03, 1.6548e-02,\n 2.2114e-02, 8.9850e-03, 1.6406e-02, 1.7232e-03, 1.2786e-02, 3.1307e-07,\n 4.9854e-03, 4.0865e-03, 3.4394e-07, 5.4478e-07, 1.4107e-02, 1.9926e-02,\n 1.0058e-04, 1.7903e-02, 1.6516e-02, 6.1304e-03, 4.3734e-03, 2.0879e-03,\n 3.7778e-03, 4.0758e-06, 1.6172e-02, 1.1247e-02, 1.8040e-02, 1.0636e-02,\n 1.0308e-02, 2.7883e-03, 5.5917e-06, 1.4616e-02, 6.2573e-03, 1.4898e-03,\n 1.7158e-02, 1.9321e-02, 1.9991e-02, 1.6465e-02, 8.9090e-03, 2.0490e-06,\n 1.1226e-07, 1.4761e-02, 8.8807e-03, 1.9667e-02, 1.7528e-02, 6.0540e-03,\n 1.6431e-02, 1.7108e-02, 1.7352e-02, 1.5799e-02, 1.6022e-02, 1.8787e-02,\n 3.5264e-03, 1.8629e-02, 1.8250e-02, 1.5643e-02, 1.5740e-02, 7.6894e-03,\n 1.5372e-02, 1.8558e-02, 2.0713e-02, 4.3730e-05, 2.0132e-02, 9.0030e-03,\n 1.6618e-02, 1.6743e-02, 1.6162e-02, 1.8333e-02, 1.7295e-02, 1.8789e-02,\n 1.4703e-02, 5.6712e-05, 1.4897e-02, 8.1449e-03, 2.0252e-02, 1.8296e-02,\n 5.1933e-08, 1.6048e-02, 9.2359e-04, 7.3904e-05, 1.8577e-02, 1.9959e-02,\n 1.3895e-02, 1.3569e-02, 1.6303e-02, 1.8068e-02, 1.5074e-02, 2.5697e-03,\n 1.5315e-02, 1.4183e-02, 1.5812e-02, 1.2953e-02, 6.2200e-03, 1.9376e-02,\n 1.8279e-02, 8.9495e-04, 1.8078e-02, 1.3957e-02, 1.7215e-02, 7.2711e-03,\n 1.5375e-02, 1.7679e-02, 3.1085e-06, 1.3844e-03, 1.1514e-02, 2.0096e-02,\n 1.6912e-02, 1.6843e-02, 5.8150e-03, 3.1123e-03, 6.3333e-03, 3.7299e-03,\n 1.5953e-02, 1.6468e-02, 2.5355e-04, 9.9471e-05, 1.1112e-03, 6.1182e-03,\n 1.0702e-02, 1.7909e-02, 1.3577e-03, 1.4747e-02, 9.0966e-03, 6.6425e-03,\n 1.2400e-02, 1.7522e-02, 2.6620e-03, 4.5561e-03, 4.0022e-03, 1.7254e-02,\n 6.1405e-06, 1.7645e-02, 1.8053e-02, 1.9364e-02, 2.1054e-02, 1.9905e-02,\n 1.6793e-02, 1.6094e-02, 1.1116e-02, 1.6410e-02, 1.6261e-02, 1.4181e-08,\n 9.1015e-03, 1.8053e-02, 5.7518e-03, 1.5482e-02, 2.0410e-04, 8.1102e-03,\n 1.8365e-02, 1.6995e-02, 1.0036e-03, 1.8236e-02, 1.5998e-02, 1.5762e-02,\n 1.6276e-02, 8.9184e-04, 1.7372e-02, 6.1681e-03, 1.8249e-02, 1.0992e-02,\n 1.4126e-02, 1.3468e-02, 1.8014e-02, 2.9758e-03, 1.7502e-02, 2.8353e-03,\n 1.8404e-02, 1.5680e-02, 1.7732e-02, 1.6440e-02, 1.8413e-02, 4.7115e-03,\n 1.8638e-02, 1.9537e-03, 1.8190e-02, 7.1188e-03, 8.9995e-03, 1.5712e-05,\n 1.6346e-02, 6.8524e-03, 6.3748e-07, 3.1674e-03, 9.1334e-07, 9.3132e-04,\n 5.6603e-03, 1.2174e-02, 1.5160e-02, 1.9009e-02, 1.5638e-02, 4.7673e-03,\n 1.7016e-02, 3.0314e-03, 1.9739e-02, 1.5944e-02, 1.2943e-02, 1.5510e-02,\n 8.6399e-03, 5.6596e-03, 6.2255e-03, 9.5688e-06, 1.6686e-02, 2.0533e-02,\n 1.8495e-02, 4.6201e-03, 4.1361e-03, 2.0182e-02, 3.7870e-03, 1.5436e-02,\n 1.1629e-02, 3.4890e-03, 1.0200e-02, 1.6730e-02, 1.2250e-02, 1.8554e-02,\n 6.7240e-03, 1.4706e-02, 1.7141e-02, 2.3400e-03, 2.9501e-06, 6.1673e-03,\n 7.6003e-03, 1.7310e-02, 1.4075e-02, 1.7002e-02, 1.7218e-02, 1.2266e-02,\n 1.5216e-02, 1.2400e-02, 1.8126e-02, 2.8434e-03, 1.3225e-02, 1.6580e-02,\n 1.9668e-02, 1.6248e-02, 1.6331e-02, 1.6534e-02, 2.1903e-03, 1.7074e-02,\n 2.6222e-03, 1.8165e-02, 1.4966e-02, 1.8902e-02, 1.3500e-02, 1.9297e-02,\n 7.4373e-03, 2.9604e-06, 1.3037e-05, 1.3654e-02, 1.4526e-02, 1.7412e-02,\n 1.7856e-02, 1.8401e-06, 1.7803e-02, 2.2227e-02, 1.4747e-04, 1.8303e-02,\n 3.3472e-03, 9.2779e-08, 1.2950e-06, 1.8184e-02, 1.4094e-02, 1.8324e-02,\n 1.6660e-02, 1.4634e-02, 1.3233e-02, 1.8590e-02, 1.6637e-02, 1.5577e-02,\n 1.6404e-02, 2.8314e-06, 1.8143e-03, 1.1277e-02, 1.6750e-02, 1.8065e-02,\n 2.6220e-06, 1.8991e-03, 1.8023e-02, 1.7743e-02, 1.2555e-02, 2.2086e-02,\n 1.7756e-02, 1.5363e-02, 1.6971e-02, 1.9232e-02, 1.6353e-02, 9.7520e-05,\n 1.6294e-02, 1.4263e-02, 1.6250e-02, 1.3664e-02, 3.4210e-03, 1.7195e-02,\n 2.6770e-06, 3.7029e-09, 2.2782e-03, 1.1965e-03, 5.5402e-03, 1.0826e-05,\n 1.7758e-02, 1.7853e-03, 1.6260e-02, 1.7396e-02, 8.8334e-03, 1.2946e-02,\n 4.7707e-03, 1.8341e-02, 1.5500e-02, 8.6413e-03, 1.4960e-02, 5.0860e-03,\n 1.8427e-02, 1.7727e-02, 1.8552e-02, 1.7232e-02, 1.5772e-02, 1.3550e-02,\n 1.9445e-02, 1.3188e-07, 1.7274e-05, 1.7283e-02, 1.4432e-02, 2.2199e-02,\n 1.2002e-02, 8.0722e-03, 2.4180e-03, 1.4398e-02, 1.2979e-02, 4.3121e-03,\n 7.7428e-03, 3.5046e-05, 1.9069e-05, 1.7086e-02, 1.0107e-02, 1.2835e-02,\n 3.0826e-05, 1.8859e-02, 1.6542e-02, 1.5748e-02, 2.0743e-03, 2.3803e-03,\n 1.6765e-02, 1.3605e-02, 5.1618e-03, 1.0833e-02, 1.4888e-02, 2.4556e-03,\n 1.4536e-02, 1.6998e-02, 9.2375e-03, 2.1265e-02, 2.0489e-03, 9.7723e-04,\n 1.7579e-02, 5.3052e-06, 1.5887e-04, 1.5638e-02, 1.5564e-02, 1.2871e-02,\n 1.6129e-02, 2.4327e-02, 4.8870e-03, 1.5943e-02, 2.0468e-02, 4.0398e-03,\n 1.9566e-02, 2.2624e-02, 1.2940e-02, 2.5075e-03, 1.6921e-02, 1.2306e-02,\n 1.5474e-02, 6.0880e-04, 1.8748e-02, 1.0882e-02, 1.6619e-03, 1.4346e-02,\n 1.6405e-02, 1.0783e-02, 1.7167e-02, 1.4428e-02, 1.5860e-02, 1.2360e-02,\n 1.4884e-09, 3.8985e-03, 6.8472e-03, 1.8204e-02, 1.3519e-02, 2.2676e-02,\n 1.2496e-02, 1.7735e-02, 1.2627e-02, 7.9700e-06, 1.8581e-02, 1.5395e-02,\n 1.4274e-05, 1.6384e-02, 7.2122e-03, 3.6154e-03, 1.5931e-02, 4.0571e-03,\n 1.8472e-02, 2.9771e-09, 2.8081e-03, 1.4730e-02, 1.5367e-02, 2.8912e-03,\n 2.0022e-02, 2.0257e-05, 1.3164e-02, 1.8537e-02, 1.3385e-02, 1.6409e-02,\n 1.2622e-02, 8.2167e-03, 1.4872e-02, 4.0220e-03, 1.2890e-02, 1.4525e-02,\n 5.0494e-04, 1.9957e-06, 4.1671e-04, 1.9199e-02, 3.1829e-07, 1.5725e-02,\n 1.4055e-02, 7.4773e-07, 1.3554e-02, 1.2199e-02, 5.8713e-03, 1.0020e-04,\n 1.9919e-04, 2.3390e-03], device='cuda:0')"
24
  },
25
  "4": {
26
+ "step": "tensor(1252.)",
27
+ "exp_avg": "tensor([[ 3.9230e-04, 8.6013e-05, -4.0562e-28, ..., -8.0357e-06,\n 3.1746e-05, -1.4099e-05],\n [ 4.2048e-04, 1.7068e-04, -2.2865e-28, ..., -1.9681e-05,\n -7.8058e-05, -9.3920e-05],\n [-5.8150e-04, -5.1838e-05, 1.9939e-28, ..., 3.0194e-07,\n 1.7539e-04, -8.2012e-06],\n ...,\n [ 3.6278e-04, 3.8798e-05, 4.8383e-29, ..., -1.4735e-05,\n -9.2320e-06, 1.6069e-05],\n [-3.5278e-04, -2.1873e-04, -1.6439e-28, ..., 3.7640e-05,\n -1.1982e-04, 3.7282e-05],\n [-6.4497e-05, 2.1634e-04, -2.8426e-28, ..., 5.2144e-06,\n 1.2079e-05, 1.8201e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[1.0953e-06, 1.3442e-06, 1.1048e-10, ..., 3.9482e-09, 8.9938e-09,\n 1.8019e-08],\n [2.1084e-06, 2.6551e-06, 1.6841e-10, ..., 1.5030e-09, 2.3215e-08,\n 9.7827e-08],\n [1.8753e-06, 2.5430e-06, 4.6574e-10, ..., 2.7687e-09, 8.8683e-09,\n 6.6008e-08],\n ...,\n [1.7657e-06, 2.8547e-06, 1.5489e-10, ..., 2.8203e-09, 1.1983e-08,\n 6.1273e-08],\n [1.7073e-06, 2.9791e-06, 1.0857e-10, ..., 2.7219e-09, 1.2247e-08,\n 6.0122e-08],\n [2.1537e-06, 3.3493e-06, 1.6559e-10, ..., 2.1838e-09, 2.0388e-08,\n 3.8560e-08]], device='cuda:0')"
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "param_groups": [
32
  {
33
+ "lr": 0.0009755527298894294,
34
  "name": "shared",
35
  "betas": [
36
  0.9,
 
45
  "differentiable": false,
46
  "fused": null,
47
  "decoupled_weight_decay": true,
48
+ "initial_lr": 0.001,
49
  "params": [
50
  0,
51
+ 1
 
 
52
  ]
53
  },
54
  {
55
+ "lr": 0.0009755527298894294,
56
  "name": "scale_256",
57
  "betas": [
58
  0.9,
 
67
  "differentiable": false,
68
  "fused": null,
69
  "decoupled_weight_decay": true,
70
+ "initial_lr": 0.001,
71
  "params": [
72
+ 2,
73
+ 3,
74
  4
75
  ]
76
  },
77
  {
78
+ "lr": 0.0009755527298894294,
79
  "name": "scale_512",
80
  "betas": [
81
  0.9,
 
90
  "differentiable": false,
91
  "fused": null,
92
  "decoupled_weight_decay": true,
93
+ "initial_lr": 0.001,
94
+ "params": [
95
+ 5,
96
+ 6,
97
+ 7
98
+ ]
99
+ },
100
+ {
101
+ "lr": 0.0009755527298894294,
102
+ "name": "scale_768",
103
+ "betas": [
104
+ 0.9,
105
+ 0.999
106
+ ],
107
+ "eps": 1e-08,
108
+ "weight_decay": 1e-05,
109
+ "amsgrad": false,
110
+ "maximize": false,
111
+ "foreach": null,
112
+ "capturable": false,
113
+ "differentiable": false,
114
+ "fused": null,
115
+ "decoupled_weight_decay": true,
116
+ "initial_lr": 0.001,
117
  "params": [
118
+ 8,
119
+ 9,
120
+ 10
121
  ]
122
  },
123
  {
124
+ "lr": 0.0009755527298894294,
125
+ "name": "scale_1024",
126
+ "betas": [
127
+ 0.9,
128
+ 0.999
129
+ ],
130
+ "eps": 1e-08,
131
+ "weight_decay": 1e-05,
132
+ "amsgrad": false,
133
+ "maximize": false,
134
+ "foreach": null,
135
+ "capturable": false,
136
+ "differentiable": false,
137
+ "fused": null,
138
+ "decoupled_weight_decay": true,
139
+ "initial_lr": 0.001,
140
+ "params": [
141
+ 11,
142
+ 12,
143
+ 13
144
+ ]
145
+ },
146
+ {
147
+ "lr": 0.0009755527298894294,
148
+ "name": "scale_1280",
149
+ "betas": [
150
+ 0.9,
151
+ 0.999
152
+ ],
153
+ "eps": 1e-08,
154
+ "weight_decay": 1e-05,
155
+ "amsgrad": false,
156
+ "maximize": false,
157
+ "foreach": null,
158
+ "capturable": false,
159
+ "differentiable": false,
160
+ "fused": null,
161
+ "decoupled_weight_decay": true,
162
+ "initial_lr": 0.001,
163
+ "params": [
164
+ 14,
165
+ 15,
166
+ 16
167
+ ]
168
+ },
169
+ {
170
+ "lr": 0.0009755527298894294,
171
+ "name": "scale_1536",
172
+ "betas": [
173
+ 0.9,
174
+ 0.999
175
+ ],
176
+ "eps": 1e-08,
177
+ "weight_decay": 1e-05,
178
+ "amsgrad": false,
179
+ "maximize": false,
180
+ "foreach": null,
181
+ "capturable": false,
182
+ "differentiable": false,
183
+ "fused": null,
184
+ "decoupled_weight_decay": true,
185
+ "initial_lr": 0.001,
186
+ "params": [
187
+ 17,
188
+ 18,
189
+ 19
190
+ ]
191
+ },
192
+ {
193
+ "lr": 0.0009755527298894294,
194
+ "name": "scale_1792",
195
+ "betas": [
196
+ 0.9,
197
+ 0.999
198
+ ],
199
+ "eps": 1e-08,
200
+ "weight_decay": 1e-05,
201
+ "amsgrad": false,
202
+ "maximize": false,
203
+ "foreach": null,
204
+ "capturable": false,
205
+ "differentiable": false,
206
+ "fused": null,
207
+ "decoupled_weight_decay": true,
208
+ "initial_lr": 0.001,
209
+ "params": [
210
+ 20,
211
+ 21,
212
+ 22
213
+ ]
214
+ },
215
+ {
216
+ "lr": 0.0009755527298894294,
217
+ "name": "scale_2048",
218
+ "betas": [
219
+ 0.9,
220
+ 0.999
221
+ ],
222
+ "eps": 1e-08,
223
+ "weight_decay": 1e-05,
224
+ "amsgrad": false,
225
+ "maximize": false,
226
+ "foreach": null,
227
+ "capturable": false,
228
+ "differentiable": false,
229
+ "fused": null,
230
+ "decoupled_weight_decay": true,
231
+ "initial_lr": 0.001,
232
+ "params": [
233
+ 23,
234
+ 24,
235
+ 25
236
+ ]
237
+ },
238
+ {
239
+ "lr": 0.0009755527298894294,
240
+ "name": "scale_2304",
241
+ "betas": [
242
+ 0.9,
243
+ 0.999
244
+ ],
245
+ "eps": 1e-08,
246
+ "weight_decay": 1e-05,
247
+ "amsgrad": false,
248
+ "maximize": false,
249
+ "foreach": null,
250
+ "capturable": false,
251
+ "differentiable": false,
252
+ "fused": null,
253
+ "decoupled_weight_decay": true,
254
+ "initial_lr": 0.001,
255
+ "params": [
256
+ 26,
257
+ 27,
258
+ 28
259
+ ]
260
+ },
261
+ {
262
+ "lr": 0.0009755527298894294,
263
+ "name": "scale_2560",
264
+ "betas": [
265
+ 0.9,
266
+ 0.999
267
+ ],
268
+ "eps": 1e-08,
269
+ "weight_decay": 1e-05,
270
+ "amsgrad": false,
271
+ "maximize": false,
272
+ "foreach": null,
273
+ "capturable": false,
274
+ "differentiable": false,
275
+ "fused": null,
276
+ "decoupled_weight_decay": true,
277
+ "initial_lr": 0.001,
278
+ "params": [
279
+ 29,
280
+ 30,
281
+ 31
282
+ ]
283
+ },
284
+ {
285
+ "lr": 0.00048778860081564085,
286
  "name": "fusion",
287
  "betas": [
288
  0.9,
 
297
  "differentiable": false,
298
  "fused": null,
299
  "decoupled_weight_decay": true,
300
+ "initial_lr": 0.0005,
301
  "params": [
302
+ 32,
303
+ 33,
304
+ 34,
305
+ 35,
306
+ 36,
307
+ 37,
308
+ 38,
309
+ 39,
310
+ 40,
311
+ 41,
312
+ 42,
313
+ 43,
314
+ 44,
315
+ 45,
316
+ 46,
317
+ 47,
318
+ 48,
319
+ 49,
320
+ 50,
321
+ 51,
322
+ 52,
323
+ 53,
324
+ 54,
325
+ 55,
326
+ 56,
327
+ 57,
328
+ 58,
329
+ 59,
330
+ 60,
331
+ 61
332
  ]
333
  }
334
  ]
335
  },
336
  "scheduler_state_dict": {
337
  "T_0": 10,
338
+ "T_i": 10,
339
  "T_mult": 2,
340
  "eta_min": 1e-06,
341
+ "T_cur": 1,
342
  "base_lrs": [
343
+ 0.001,
344
+ 0.001,
345
+ 0.001,
346
+ 0.001,
347
+ 0.001,
348
+ 0.001,
349
+ 0.001,
350
+ 0.001,
351
+ 0.001,
352
+ 0.001,
353
+ 0.001,
354
+ 0.0005
355
  ],
356
+ "last_epoch": 1,
357
  "_step_count": 0,
358
  "_is_initial": false,
359
  "_get_lr_called_within_step": false,
360
  "_last_lr": [
361
+ 0.0009755527298894294,
362
+ 0.0009755527298894294,
363
+ 0.0009755527298894294,
364
+ 0.0009755527298894294,
365
+ 0.0009755527298894294,
366
+ 0.0009755527298894294,
367
+ 0.0009755527298894294,
368
+ 0.0009755527298894294,
369
+ 0.0009755527298894294,
370
+ 0.0009755527298894294,
371
+ 0.0009755527298894294,
372
+ 0.00048778860081564085
373
  ]
374
  },
375
  "metrics": {
376
+ "best_val_acc": 80.786,
377
+ "best_epoch": 0,
378
  "scale_accuracies": {
379
+ "256": 80.786
 
380
  }
381
  },
382
  "train_config": {
383
  "name": "david_training",
384
  "run_id": "20251012_060013",
385
  "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
386
+ "model_variant": "clip_vit_l14",
387
  "num_classes": 1000,
388
+ "preset": "clip_vit_l14_deep",
389
  "custom_config_path": null,
390
  "num_classes_override": null,
391
  "use_belly_override": null,
392
  "belly_expand_override": null,
393
+ "progressive_training_override": true,
394
+ "scale_warmup_epochs_override": {
395
+ "256": 0,
396
+ "512": 1,
397
+ "768": 2,
398
+ "1024": 3,
399
+ "1280": 4,
400
+ "1536": 5,
401
+ "1792": 6,
402
+ "2048": 7,
403
+ "2304": 8,
404
+ "2560": 9
405
+ },
406
  "num_epochs": 10,
407
  "batch_size": 1024,
408
+ "learning_rate": 0.001,
409
  "weight_decay": 1e-05,
410
  "warmup_epochs": 0,
411
  "use_rose_loss": true,
weights/david_config.json CHANGED
@@ -1,29 +1,45 @@
1
  {
2
- "name": "david_small_fast",
3
- "uid": "c.david.small_fast",
4
- "feature_dim": 512,
5
  "num_classes": 1000,
6
  "scales": [
7
  256,
8
- 512
 
 
 
 
 
 
 
 
9
  ],
10
- "sharing_mode": "fully_shared",
11
- "fusion_mode": "weighted_sum",
12
- "use_belly": false,
13
  "belly_expand": 2.0,
14
- "shared_feature_dim": 512,
15
- "shared_layers": 1,
16
  "shared_dropout": 0.1,
17
  "fusion_temperature": 1.0,
18
  "fusion_dropout": 0.1,
19
  "tree_depth": 3,
20
- "num_experts": 3,
21
  "compression_ratio": 4,
22
  "expert_dropout": 0.1,
23
  "attention_dropout": 0.1,
24
- "progressive_training": false,
25
  "scale_warmup_epochs": {
26
  "256": 0,
27
- "512": 0
 
 
 
 
 
 
 
 
28
  }
29
  }
 
1
  {
2
+ "name": "david_clip_vit_l14_deep",
3
+ "uid": "c.david.clip_vit_l14_deep",
4
+ "feature_dim": 768,
5
  "num_classes": 1000,
6
  "scales": [
7
  256,
8
+ 512,
9
+ 768,
10
+ 1024,
11
+ 1280,
12
+ 1536,
13
+ 1792,
14
+ 2048,
15
+ 2304,
16
+ 2560
17
  ],
18
+ "sharing_mode": "partial_shared",
19
+ "fusion_mode": "deep_efficiency",
20
+ "use_belly": true,
21
  "belly_expand": 2.0,
22
+ "shared_feature_dim": 1024,
23
+ "shared_layers": 4,
24
  "shared_dropout": 0.1,
25
  "fusion_temperature": 1.0,
26
  "fusion_dropout": 0.1,
27
  "tree_depth": 3,
28
+ "num_experts": 4,
29
  "compression_ratio": 4,
30
  "expert_dropout": 0.1,
31
  "attention_dropout": 0.1,
32
+ "progressive_training": true,
33
  "scale_warmup_epochs": {
34
  "256": 0,
35
+ "512": 1,
36
+ "768": 2,
37
+ "1024": 3,
38
+ "1280": 4,
39
+ "1536": 5,
40
+ "1792": 6,
41
+ "2048": 7,
42
+ "2304": 8,
43
+ "2560": 9
44
  }
45
  }
weights/train_config.json CHANGED
@@ -2,18 +2,29 @@
2
  "name": "david_training",
3
  "run_id": "20251012_060013",
4
  "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
5
- "model_variant": "clip_vit_b16",
6
  "num_classes": 1000,
7
- "preset": "small_fast",
8
  "custom_config_path": null,
9
  "num_classes_override": null,
10
  "use_belly_override": null,
11
  "belly_expand_override": null,
12
- "progressive_training_override": false,
13
- "scale_warmup_epochs_override": null,
 
 
 
 
 
 
 
 
 
 
 
14
  "num_epochs": 10,
15
  "batch_size": 1024,
16
- "learning_rate": 0.01,
17
  "weight_decay": 1e-05,
18
  "warmup_epochs": 0,
19
  "use_rose_loss": true,
 
2
  "name": "david_training",
3
  "run_id": "20251012_060013",
4
  "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
5
+ "model_variant": "clip_vit_l14",
6
  "num_classes": 1000,
7
+ "preset": "clip_vit_l14_deep",
8
  "custom_config_path": null,
9
  "num_classes_override": null,
10
  "use_belly_override": null,
11
  "belly_expand_override": null,
12
+ "progressive_training_override": true,
13
+ "scale_warmup_epochs_override": {
14
+ "256": 0,
15
+ "512": 1,
16
+ "768": 2,
17
+ "1024": 3,
18
+ "1280": 4,
19
+ "1536": 5,
20
+ "1792": 6,
21
+ "2048": 7,
22
+ "2304": 8,
23
+ "2560": 9
24
+ },
25
  "num_epochs": 10,
26
  "batch_size": 1024,
27
+ "learning_rate": 0.001,
28
  "weight_decay": 1e-05,
29
  "warmup_epochs": 0,
30
  "use_rose_loss": true,