jmmr-8282 commited on
Commit
49f525a
·
verified ·
1 Parent(s): a9dd8ba

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7606954d9b149ad6da4ba7287d3eca4aab7f96e7e0c05af1ef72c6718c0665d1
3
  size 437961700
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de66461b285186046f05cf8c9930e5ada0e3020ca4762399d59313044475ff5
3
  size 437961700
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8efe5909e5060fea686e30328d0c0f21c5eb98f603f6b70922a4788ae72129f7
3
  size 118180619
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ecf8f3a21cd9c5465c5646b40ff679305ca73e42a836475588610795c8282d
3
  size 118180619
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96231dbbd61b433bb6d6121f51c259cf0e1ad0a076de7ae1c77f190e1ce69384
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30cd7f2e948438b68553097ba0a0eb4a894fda376f31a677f8238066dd201d3d
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4fed85ea0917cc2d05bf8af0c7463301adc898027ff20864aac2aa94a9e56b
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8f9c79c40c52add04fc8f64e1f918436a34eb087c6b1500ed8db075d6f0e89
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8d5acdd64798810c4bd3cf19ecd254b731043662f14025cd0a76112a9a3e21b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d461e0a24e49e017f7b0d3a73c58b356814d52598c84ba4f408c3edc8c3527
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 392,
3
  "best_metric": 1.9849951267242432,
4
  "best_model_checkpoint": "./bert-resume/checkpoint-392",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
- "global_step": 392,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -24,6 +24,21 @@
24
  "eval_samples_per_second": 53.842,
25
  "eval_steps_per_second": 1.684,
26
  "step": 392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  ],
29
  "logging_steps": 500,
@@ -43,7 +58,7 @@
43
  "attributes": {}
44
  }
45
  },
46
- "total_flos": 3284181680080896.0,
47
  "train_batch_size": 32,
48
  "trial_name": null,
49
  "trial_params": null
 
2
  "best_global_step": 392,
3
  "best_metric": 1.9849951267242432,
4
  "best_model_checkpoint": "./bert-resume/checkpoint-392",
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 588,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
24
  "eval_samples_per_second": 53.842,
25
  "eval_steps_per_second": 1.684,
26
  "step": 392
27
+ },
28
+ {
29
+ "epoch": 2.5510204081632653,
30
+ "grad_norm": 7.228590965270996,
31
+ "learning_rate": 1.7454081632653063e-05,
32
+ "loss": 1.8102947998046874,
33
+ "step": 500
34
+ },
35
+ {
36
+ "epoch": 3.0,
37
+ "eval_loss": 2.102543830871582,
38
+ "eval_runtime": 33.1614,
39
+ "eval_samples_per_second": 53.044,
40
+ "eval_steps_per_second": 1.659,
41
+ "step": 588
42
  }
43
  ],
44
  "logging_steps": 500,
 
58
  "attributes": {}
59
  }
60
  },
61
+ "total_flos": 4926272520121344.0,
62
  "train_batch_size": 32,
63
  "trial_name": null,
64
  "trial_params": null