jmmr-8282 commited on
Commit
928a334
·
verified ·
1 Parent(s): 6529fc4

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4488cbc56499a0fd5a6541431111a9dfd93133d037941f061e0b46133fb4a5a
3
  size 437958624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a2e34018327439bd68c02f0672eaeafee3606887a581e0c6415dd24446b1f34
3
  size 437958624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6df815422d05de057db5f09faffcbabac714e2a453917053fd9ff048f0b2aa54
3
- size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5d22a235930e2c29bda06919eee14e9df56f4d9eace1bf3b1736ecf8212ea7
3
+ size 4741923
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bcbaa2aabec41ab6ddbecc0bb29c7ea2c5176ca5f2b9b4caa2a00b191411de1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb7e7f0e32311ea4a34bdd12b1dc8ec66d46164a59b314eea8415d8682591cf
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34ea89449ac93a3391a38a417e2dd400eff8832c6d069777a257c68b16d10335
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8457a345e0d53a37b5f1ebb6db1120270f6a7f4b99a871d4acda8980a58382cc
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fbd75b8ebb26bc9af4014ad504a07ec158976c6e00388b7f420c4414a2d5005
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3af56a55052e98aa5beb6c4c90cc3520c9d76d06c8b7bd66364a88c166c795
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,102 +1,28 @@
1
  {
2
- "best_global_step": 3910,
3
- "best_metric": 1.284986972808838,
4
- "best_model_checkpoint": "./bert-imdb-cp/checkpoint-3910",
5
- "epoch": 5.0,
6
  "eval_steps": 500,
7
- "global_step": 3910,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.639386189258312,
14
- "grad_norm": 3.338480234146118,
15
- "learning_rate": 1.7447570332480818e-05,
16
- "loss": 1.374810546875,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_loss": 1.3370563983917236,
22
- "eval_runtime": 470.9034,
23
- "eval_samples_per_second": 53.089,
24
- "eval_steps_per_second": 1.661,
25
  "step": 782
26
- },
27
- {
28
- "epoch": 1.278772378516624,
29
- "grad_norm": 1.7996962070465088,
30
- "learning_rate": 1.489002557544757e-05,
31
- "loss": 1.3492745361328125,
32
- "step": 1000
33
- },
34
- {
35
- "epoch": 1.918158567774936,
36
- "grad_norm": 8.485663414001465,
37
- "learning_rate": 1.2332480818414323e-05,
38
- "loss": 1.33461083984375,
39
- "step": 1500
40
- },
41
- {
42
- "epoch": 2.0,
43
- "eval_loss": 1.3118603229522705,
44
- "eval_runtime": 470.0542,
45
- "eval_samples_per_second": 53.185,
46
- "eval_steps_per_second": 1.664,
47
- "step": 1564
48
- },
49
- {
50
- "epoch": 2.557544757033248,
51
- "grad_norm": 3.0998706817626953,
52
- "learning_rate": 9.774936061381075e-06,
53
- "loss": 1.3203563232421875,
54
- "step": 2000
55
- },
56
- {
57
- "epoch": 3.0,
58
- "eval_loss": 1.296446681022644,
59
- "eval_runtime": 470.8225,
60
- "eval_samples_per_second": 53.099,
61
- "eval_steps_per_second": 1.661,
62
- "step": 2346
63
- },
64
- {
65
- "epoch": 3.1969309462915603,
66
- "grad_norm": 5.162097454071045,
67
- "learning_rate": 7.217391304347827e-06,
68
- "loss": 1.3065758056640624,
69
- "step": 2500
70
- },
71
- {
72
- "epoch": 3.836317135549872,
73
- "grad_norm": 6.115355491638184,
74
- "learning_rate": 4.659846547314578e-06,
75
- "loss": 1.3031380615234376,
76
- "step": 3000
77
- },
78
- {
79
- "epoch": 4.0,
80
- "eval_loss": 1.287236213684082,
81
- "eval_runtime": 470.4081,
82
- "eval_samples_per_second": 53.145,
83
- "eval_steps_per_second": 1.662,
84
- "step": 3128
85
- },
86
- {
87
- "epoch": 4.475703324808184,
88
- "grad_norm": 2.687530755996704,
89
- "learning_rate": 2.10230179028133e-06,
90
- "loss": 1.2987099609375,
91
- "step": 3500
92
- },
93
- {
94
- "epoch": 5.0,
95
- "eval_loss": 1.284986972808838,
96
- "eval_runtime": 470.8474,
97
- "eval_samples_per_second": 53.096,
98
- "eval_steps_per_second": 1.661,
99
- "step": 3910
100
  }
101
  ],
102
  "logging_steps": 500,
@@ -111,12 +37,12 @@
111
  "should_evaluate": false,
112
  "should_log": false,
113
  "should_save": true,
114
- "should_training_stop": true
115
  },
116
  "attributes": {}
117
  }
118
  },
119
- "total_flos": 3.287503158960144e+16,
120
  "train_batch_size": 32,
121
  "trial_name": null,
122
  "trial_params": null
 
1
  {
2
+ "best_global_step": 782,
3
+ "best_metric": 1.0078198909759521,
4
+ "best_model_checkpoint": "./bert-imdb-cp/checkpoint-782",
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 782,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.639386189258312,
14
+ "grad_norm": 2.5200116634368896,
15
+ "learning_rate": 8.723785166240409e-06,
16
+ "loss": 1.263746337890625,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_loss": 1.0078198909759521,
22
+ "eval_runtime": 448.448,
23
+ "eval_samples_per_second": 55.748,
24
+ "eval_steps_per_second": 1.744,
25
  "step": 782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "logging_steps": 500,
 
37
  "should_evaluate": false,
38
  "should_log": false,
39
  "should_save": true,
40
+ "should_training_stop": false
41
  },
42
  "attributes": {}
43
  }
44
  },
45
+ "total_flos": 6575737273320960.0,
46
  "train_batch_size": 32,
47
  "trial_name": null,
48
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3efe3b88791da124abe68a3d5201ca5d762d3f8f30b2bdebb5050a9036093793
3
  size 5201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1107345434efc608151ecaa36a5f772158c66da373d39ab11cc42f09d2bcbda3
3
  size 5201