jmmr-8282 commited on
Commit
1281fe1
·
verified ·
1 Parent(s): 357dd7f

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c056c3378f5d58d4584775046ce97b8d16de39aa589668238c1423d31b1662b
3
  size 437958624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e792c3c8839f229a0d3e7a010747c64818b39d76aab77c91becbf500e46fe70c
3
  size 437958624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df12f0a471877cef6d700eb1c766ae445f98309a61a39a10186a2b6aeebe1eb4
3
  size 4741923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1094d23f68bffc5d257d8a449b9354b1be2476eca95964bbfab629b7e5c6dcd0
3
  size 4741923
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2d553173fafc1568aba563e8b2ebc65b521fc7ac5272baf9b3276c61a1b3955
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcbaa2aabec41ab6ddbecc0bb29c7ea2c5176ca5f2b9b4caa2a00b191411de1
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0635f5ee8287266c95252541d63a4925053f9e404f1dbe420921c5ed9e5141ff
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34ea89449ac93a3391a38a417e2dd400eff8832c6d069777a257c68b16d10335
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e9663833f02fe99567660dbbf033273979d059bd123e7db38679c72f2ea5889
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:804847f8dfb379b4e4a341f8b67e61c497566f1895ecba99d0357e804b6e2e3b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3128,
3
- "best_metric": 0.7798203825950623,
4
- "best_model_checkpoint": "./bert-imdb-cp/checkpoint-3128",
5
- "epoch": 4.0,
6
  "eval_steps": 500,
7
- "global_step": 3128,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -82,6 +82,21 @@
82
  "eval_samples_per_second": 55.783,
83
  "eval_steps_per_second": 1.745,
84
  "step": 3128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 500,
@@ -96,12 +111,12 @@
96
  "should_evaluate": false,
97
  "should_log": false,
98
  "should_save": true,
99
- "should_training_stop": false
100
  },
101
  "attributes": {}
102
  }
103
  },
104
- "total_flos": 2.63008072048488e+16,
105
  "train_batch_size": 32,
106
  "trial_name": null,
107
  "trial_params": null
 
1
  {
2
+ "best_global_step": 3910,
3
+ "best_metric": 0.773769736289978,
4
+ "best_model_checkpoint": "./bert-imdb-cp/checkpoint-3910",
5
+ "epoch": 5.0,
6
  "eval_steps": 500,
7
+ "global_step": 3910,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
82
  "eval_samples_per_second": 55.783,
83
  "eval_steps_per_second": 1.745,
84
  "step": 3128
85
+ },
86
+ {
87
+ "epoch": 4.475703324808184,
88
+ "grad_norm": 1.3006492853164673,
89
+ "learning_rate": 1.051150895140665e-06,
90
+ "loss": 0.822345947265625,
91
+ "step": 3500
92
+ },
93
+ {
94
+ "epoch": 5.0,
95
+ "eval_loss": 0.773769736289978,
96
+ "eval_runtime": 447.9571,
97
+ "eval_samples_per_second": 55.809,
98
+ "eval_steps_per_second": 1.746,
99
+ "step": 3910
100
  }
101
  ],
102
  "logging_steps": 500,
 
111
  "should_evaluate": false,
112
  "should_log": false,
113
  "should_save": true,
114
+ "should_training_stop": true
115
  },
116
  "attributes": {}
117
  }
118
  },
119
+ "total_flos": 3.287503158960144e+16,
120
  "train_batch_size": 32,
121
  "trial_name": null,
122
  "trial_params": null