Michnik commited on
Commit
87c6483
·
verified ·
1 Parent(s): 918dbd9

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:897a2d76b46c37666339a32f0e70836619e29545db41fd7abc14ad6a6fd8ea24
3
  size 4682414560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a72aea82d33a0df376039af26e4fb5cea26ea4417d8c78ed85fc757bb6a39ca
3
  size 4682414560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bf8f34d230bfa09c07aa594b3482bd9e2f1f6cd0b2ab88dc019359c8235308f
3
  size 2498736801
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea46decce79789e325e0aa6d402cfa5b28b69b0c3e037f2a401da9623e1a137
3
  size 2498736801
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50132d3e2de48497717bc53ff4eacaef759623dee33ac6c4bcd02871b0690b2f
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227e3caad3f787fbe810fbac3c378957e1394c039f30869d94e84d08288a0af5
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e24dc0c109295a9322692845301bb3e884fbf9ab2ee06ac92f5d40034eb5191
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0bfa5100128a3b17f1f0f23b4edee8f727028cbe6ad3d5850d5d9861a5a8b4
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.005758854485037549,
6
  "eval_steps": 500,
7
- "global_step": 9500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1338,6 +1338,76 @@
1338
  "learning_rate": 9.942435716043681e-05,
1339
  "loss": 3.2088824462890626,
1340
  "step": 9500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1341
  }
1342
  ],
1343
  "logging_steps": 50,
@@ -1357,7 +1427,7 @@
1357
  "attributes": {}
1358
  }
1359
  },
1360
- "total_flos": 3.56581258564608e+16,
1361
  "train_batch_size": 1,
1362
  "trial_name": null,
1363
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.006061952089513209,
6
  "eval_steps": 500,
7
+ "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1338
  "learning_rate": 9.942435716043681e-05,
1339
  "loss": 3.2088824462890626,
1340
  "step": 9500
1341
+ },
1342
+ {
1343
+ "epoch": 0.005789164245485115,
1344
+ "grad_norm": 1.6147823333740234,
1345
+ "learning_rate": 9.942132618508106e-05,
1346
+ "loss": 3.2871713256835937,
1347
+ "step": 9550
1348
+ },
1349
+ {
1350
+ "epoch": 0.005819474005932681,
1351
+ "grad_norm": 1.877769947052002,
1352
+ "learning_rate": 9.94182952097253e-05,
1353
+ "loss": 3.21473876953125,
1354
+ "step": 9600
1355
+ },
1356
+ {
1357
+ "epoch": 0.005849783766380247,
1358
+ "grad_norm": 2.283907413482666,
1359
+ "learning_rate": 9.941526423436957e-05,
1360
+ "loss": 3.3842514038085936,
1361
+ "step": 9650
1362
+ },
1363
+ {
1364
+ "epoch": 0.0058800935268278134,
1365
+ "grad_norm": 1.681667685508728,
1366
+ "learning_rate": 9.941223325901382e-05,
1367
+ "loss": 3.2447711181640626,
1368
+ "step": 9700
1369
+ },
1370
+ {
1371
+ "epoch": 0.005910403287275379,
1372
+ "grad_norm": 1.9879530668258667,
1373
+ "learning_rate": 9.940920228365808e-05,
1374
+ "loss": 3.2267584228515624,
1375
+ "step": 9750
1376
+ },
1377
+ {
1378
+ "epoch": 0.005940713047722945,
1379
+ "grad_norm": 2.1548056602478027,
1380
+ "learning_rate": 9.940617130830233e-05,
1381
+ "loss": 3.269433898925781,
1382
+ "step": 9800
1383
+ },
1384
+ {
1385
+ "epoch": 0.005971022808170511,
1386
+ "grad_norm": 1.723276138305664,
1387
+ "learning_rate": 9.940314033294658e-05,
1388
+ "loss": 3.36541748046875,
1389
+ "step": 9850
1390
+ },
1391
+ {
1392
+ "epoch": 0.006001332568618077,
1393
+ "grad_norm": 2.4308693408966064,
1394
+ "learning_rate": 9.940010935759084e-05,
1395
+ "loss": 3.1993017578125,
1396
+ "step": 9900
1397
+ },
1398
+ {
1399
+ "epoch": 0.006031642329065643,
1400
+ "grad_norm": 2.3158278465270996,
1401
+ "learning_rate": 9.93970783822351e-05,
1402
+ "loss": 3.168118896484375,
1403
+ "step": 9950
1404
+ },
1405
+ {
1406
+ "epoch": 0.006061952089513209,
1407
+ "grad_norm": 1.8310009241104126,
1408
+ "learning_rate": 9.939410802638646e-05,
1409
+ "loss": 3.175491943359375,
1410
+ "step": 10000
1411
  }
1412
  ],
1413
  "logging_steps": 50,
 
1427
  "attributes": {}
1428
  }
1429
  },
1430
+ "total_flos": 3.755622008506368e+16,
1431
  "train_batch_size": 1,
1432
  "trial_name": null,
1433
  "trial_params": null