Michnik commited on
Commit
a03df93
·
verified ·
1 Parent(s): ab8abc1

Training in progress, step 10500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a72aea82d33a0df376039af26e4fb5cea26ea4417d8c78ed85fc757bb6a39ca
3
  size 4682414560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6652d4f4cf847b56ee09f55ddc4aa71f4b641ef2a2d758460b17d2b05b1154
3
  size 4682414560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ea46decce79789e325e0aa6d402cfa5b28b69b0c3e037f2a401da9623e1a137
3
  size 2498736801
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c36679d5b9dae75f3ef6d4ee031e17c0bdffe90022cc65eb8474f3006a513d
3
  size 2498736801
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:227e3caad3f787fbe810fbac3c378957e1394c039f30869d94e84d08288a0af5
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4678e09f8fcc61d92df0e65077038de31aeb262232e6a2dbf1a3ffba70ea64
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad0bfa5100128a3b17f1f0f23b4edee8f727028cbe6ad3d5850d5d9861a5a8b4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3aba3a7d5dd6e1fe4ca9aeb5413dce931776ed2a811c0c689ce3b6ea4e2b48
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.006061952089513209,
6
  "eval_steps": 500,
7
- "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1408,6 +1408,76 @@
1408
  "learning_rate": 9.939410802638646e-05,
1409
  "loss": 3.175491943359375,
1410
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1411
  }
1412
  ],
1413
  "logging_steps": 50,
@@ -1427,7 +1497,7 @@
1427
  "attributes": {}
1428
  }
1429
  },
1430
- "total_flos": 3.755622008506368e+16,
1431
  "train_batch_size": 1,
1432
  "trial_name": null,
1433
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00636504969398887,
6
  "eval_steps": 500,
7
+ "global_step": 10500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1408
  "learning_rate": 9.939410802638646e-05,
1409
  "loss": 3.175491943359375,
1410
  "step": 10000
1411
+ },
1412
+ {
1413
+ "epoch": 0.006092261849960775,
1414
+ "grad_norm": 2.3004519939422607,
1415
+ "learning_rate": 9.939107705103073e-05,
1416
+ "loss": 3.217908935546875,
1417
+ "step": 10050
1418
+ },
1419
+ {
1420
+ "epoch": 0.006122571610408341,
1421
+ "grad_norm": 2.0357086658477783,
1422
+ "learning_rate": 9.938804607567496e-05,
1423
+ "loss": 3.2446975708007812,
1424
+ "step": 10100
1425
+ },
1426
+ {
1427
+ "epoch": 0.006152881370855907,
1428
+ "grad_norm": 2.126295566558838,
1429
+ "learning_rate": 9.938501510031923e-05,
1430
+ "loss": 3.1879031372070314,
1431
+ "step": 10150
1432
+ },
1433
+ {
1434
+ "epoch": 0.0061831911313034735,
1435
+ "grad_norm": 1.7915022373199463,
1436
+ "learning_rate": 9.938198412496348e-05,
1437
+ "loss": 3.098807373046875,
1438
+ "step": 10200
1439
+ },
1440
+ {
1441
+ "epoch": 0.00621350089175104,
1442
+ "grad_norm": 2.8946573734283447,
1443
+ "learning_rate": 9.937895314960774e-05,
1444
+ "loss": 3.3009078979492186,
1445
+ "step": 10250
1446
+ },
1447
+ {
1448
+ "epoch": 0.006243810652198606,
1449
+ "grad_norm": 2.3917036056518555,
1450
+ "learning_rate": 9.937592217425199e-05,
1451
+ "loss": 3.362381591796875,
1452
+ "step": 10300
1453
+ },
1454
+ {
1455
+ "epoch": 0.006274120412646172,
1456
+ "grad_norm": 2.2558183670043945,
1457
+ "learning_rate": 9.937289119889624e-05,
1458
+ "loss": 3.2506768798828123,
1459
+ "step": 10350
1460
+ },
1461
+ {
1462
+ "epoch": 0.006304430173093738,
1463
+ "grad_norm": 1.747912883758545,
1464
+ "learning_rate": 9.93698602235405e-05,
1465
+ "loss": 3.1590435791015623,
1466
+ "step": 10400
1467
+ },
1468
+ {
1469
+ "epoch": 0.006334739933541304,
1470
+ "grad_norm": 2.056442975997925,
1471
+ "learning_rate": 9.936682924818475e-05,
1472
+ "loss": 3.1585858154296873,
1473
+ "step": 10450
1474
+ },
1475
+ {
1476
+ "epoch": 0.00636504969398887,
1477
+ "grad_norm": 2.221165895462036,
1478
+ "learning_rate": 9.936379827282901e-05,
1479
+ "loss": 3.2523733520507814,
1480
+ "step": 10500
1481
  }
1482
  ],
1483
  "logging_steps": 50,
 
1497
  "attributes": {}
1498
  }
1499
  },
1500
+ "total_flos": 3.94406667566039e+16,
1501
  "train_batch_size": 1,
1502
  "trial_name": null,
1503
  "trial_params": null