Michnik commited on
Commit
d60af8a
·
verified ·
1 Parent(s): dedbc40

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e6652d4f4cf847b56ee09f55ddc4aa71f4b641ef2a2d758460b17d2b05b1154
3
  size 4682414560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7ef68707a5f5b8d06ff149d9ae9f755003b6075ac855fea279d57ada67d327
3
  size 4682414560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c36679d5b9dae75f3ef6d4ee031e17c0bdffe90022cc65eb8474f3006a513d
3
  size 2498736801
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11644c814da57640facf34ba5391afecb738e0fa2fc22650c666da3e040419f
3
  size 2498736801
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc4678e09f8fcc61d92df0e65077038de31aeb262232e6a2dbf1a3ffba70ea64
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c178d88dd64302d0da55c078b7a67c25e5a7c7b6abe69bb648b63b0b5924756a
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd3aba3a7d5dd6e1fe4ca9aeb5413dce931776ed2a811c0c689ce3b6ea4e2b48
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9d3f6fc42961c8152448a892f3924f49c78cdff3507356794e0f3ef837d1210
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.00636504969398887,
6
  "eval_steps": 500,
7
- "global_step": 10500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1478,6 +1478,76 @@
1478
  "learning_rate": 9.936379827282901e-05,
1479
  "loss": 3.2523733520507814,
1480
  "step": 10500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  }
1482
  ],
1483
  "logging_steps": 50,
@@ -1497,7 +1567,7 @@
1497
  "attributes": {}
1498
  }
1499
  },
1500
- "total_flos": 3.94406667566039e+16,
1501
  "train_batch_size": 1,
1502
  "trial_name": null,
1503
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00666814729846453,
6
  "eval_steps": 500,
7
+ "global_step": 11000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1478
  "learning_rate": 9.936379827282901e-05,
1479
  "loss": 3.2523733520507814,
1480
  "step": 10500
1481
+ },
1482
+ {
1483
+ "epoch": 0.006395359454436436,
1484
+ "grad_norm": 2.3049190044403076,
1485
+ "learning_rate": 9.936076729747326e-05,
1486
+ "loss": 3.269808044433594,
1487
+ "step": 10550
1488
+ },
1489
+ {
1490
+ "epoch": 0.006425669214884002,
1491
+ "grad_norm": 1.8658502101898193,
1492
+ "learning_rate": 9.935773632211751e-05,
1493
+ "loss": 3.2778192138671876,
1494
+ "step": 10600
1495
+ },
1496
+ {
1497
+ "epoch": 0.006455978975331568,
1498
+ "grad_norm": 2.3192155361175537,
1499
+ "learning_rate": 9.935470534676176e-05,
1500
+ "loss": 3.1481027221679687,
1501
+ "step": 10650
1502
+ },
1503
+ {
1504
+ "epoch": 0.006486288735779134,
1505
+ "grad_norm": 1.8061124086380005,
1506
+ "learning_rate": 9.935167437140603e-05,
1507
+ "loss": 3.1986090087890626,
1508
+ "step": 10700
1509
+ },
1510
+ {
1511
+ "epoch": 0.0065165984962267,
1512
+ "grad_norm": 2.502110004425049,
1513
+ "learning_rate": 9.934864339605028e-05,
1514
+ "loss": 3.0882998657226564,
1515
+ "step": 10750
1516
+ },
1517
+ {
1518
+ "epoch": 0.006546908256674266,
1519
+ "grad_norm": 2.817471504211426,
1520
+ "learning_rate": 9.934561242069454e-05,
1521
+ "loss": 3.1530471801757813,
1522
+ "step": 10800
1523
+ },
1524
+ {
1525
+ "epoch": 0.006577218017121832,
1526
+ "grad_norm": 2.1066269874572754,
1527
+ "learning_rate": 9.934258144533879e-05,
1528
+ "loss": 3.195494384765625,
1529
+ "step": 10850
1530
+ },
1531
+ {
1532
+ "epoch": 0.006607527777569398,
1533
+ "grad_norm": 2.0811686515808105,
1534
+ "learning_rate": 9.933955046998305e-05,
1535
+ "loss": 3.16893310546875,
1536
+ "step": 10900
1537
+ },
1538
+ {
1539
+ "epoch": 0.006637837538016964,
1540
+ "grad_norm": 1.965430736541748,
1541
+ "learning_rate": 9.93365194946273e-05,
1542
+ "loss": 3.1695233154296876,
1543
+ "step": 10950
1544
+ },
1545
+ {
1546
+ "epoch": 0.00666814729846453,
1547
+ "grad_norm": 2.6144657135009766,
1548
+ "learning_rate": 9.933348851927155e-05,
1549
+ "loss": 3.2110235595703127,
1550
+ "step": 11000
1551
  }
1552
  ],
1553
  "logging_steps": 50,
 
1567
  "attributes": {}
1568
  }
1569
  },
1570
+ "total_flos": 4.130209137310925e+16,
1571
  "train_batch_size": 1,
1572
  "trial_name": null,
1573
  "trial_params": null