jmmr-8282 commited on
Commit
ecad9bb
·
verified ·
1 Parent(s): bcbb53f

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -13,15 +13,17 @@
13
  "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
15
  "id2label": {
16
- "0": "Safe Email",
17
- "1": "Phishing Email"
 
18
  },
19
  "initializer_range": 0.02,
20
  "intermediate_size": 3072,
21
  "is_decoder": false,
22
  "label2id": {
23
- "Phishing Email": 1,
24
- "Safe Email": 0
 
25
  },
26
  "layer_norm_eps": 1e-12,
27
  "max_position_embeddings": 512,
 
13
  "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
15
  "id2label": {
16
+ "0": "No Fit",
17
+ "1": "Potential Fit",
18
+ "2": "Good Fit"
19
  },
20
  "initializer_range": 0.02,
21
  "intermediate_size": 3072,
22
  "is_decoder": false,
23
  "label2id": {
24
+ "0": "No Fit",
25
+ "1": "Potential Fit",
26
+ "2": "Good Fit"
27
  },
28
  "layer_norm_eps": 1e-12,
29
  "max_position_embeddings": 512,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b66535ff6415d2c31921f3fe356b0fac561719725dcb426dd1c3a9773e770c5
3
- size 437958624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcc41825dd4e2dda5ab59a18fb15d2bc6d7ebc016b0d6aad6f14401fee55dc77
3
+ size 437961700
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f45e4c9cef7b3d793962401a30cc5375b49e5f6fa7d66e64cc389d199fe8102b
3
- size 4741923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4aba44d4545ce5bc2feb3d532db36ad6fd06c32b17949196f677fd6387137f
3
+ size 4748067
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f2e8413b53592d1e0dd4c602cd67bd1e7cb9a9ee96e69486272827e44496b7a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73adb01bcea0cecd8da970a42e3125d42bd82dec94c1b2c5a1cf209bdb4e6dbe
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd34198e30d2e33cdb497643e8b4ce22778c6cc08096a422300c51708bdf5a02
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0861073ade655d14fb72097d362c2e813ed5d5faa50345ce5bc8f77fc935cfd8
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:695b5daec62904ea9fe48fd42c554ece0bc33b8a351b379641efd77afed9c35f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc113105be61c25bb07c129ade488bf1ad8df712e596a6c750dd2672b51f8951
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,162 +1,27 @@
1
  {
2
- "best_global_step": 4670,
3
- "best_metric": 0.45520129799842834,
4
- "best_model_checkpoint": "./bert-email/checkpoint-4670",
5
- "epoch": 10.0,
6
  "eval_steps": 500,
7
- "global_step": 4670,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 0.9388861656188965,
15
- "eval_runtime": 66.9874,
16
- "eval_samples_per_second": 55.682,
17
- "eval_steps_per_second": 1.747,
18
- "step": 467
19
- },
20
- {
21
- "epoch": 1.0706638115631693,
22
- "grad_norm": 3.0445573329925537,
23
- "learning_rate": 7.862955032119916e-06,
24
- "loss": 1.1060218505859376,
25
- "step": 500
26
- },
27
- {
28
- "epoch": 2.0,
29
- "eval_loss": 0.7270694971084595,
30
- "eval_runtime": 66.8664,
31
- "eval_samples_per_second": 55.783,
32
- "eval_steps_per_second": 1.75,
33
- "step": 934
34
- },
35
- {
36
- "epoch": 2.1413276231263385,
37
- "grad_norm": 5.564135551452637,
38
- "learning_rate": 5.721627408993576e-06,
39
- "loss": 0.8154515380859375,
40
- "step": 1000
41
- },
42
- {
43
- "epoch": 3.0,
44
- "eval_loss": 0.6182085871696472,
45
- "eval_runtime": 66.9212,
46
- "eval_samples_per_second": 55.737,
47
- "eval_steps_per_second": 1.748,
48
- "step": 1401
49
- },
50
- {
51
- "epoch": 3.2119914346895073,
52
- "grad_norm": 2.9384329319000244,
53
- "learning_rate": 3.580299785867238e-06,
54
- "loss": 0.6724396362304688,
55
- "step": 1500
56
- },
57
- {
58
- "epoch": 4.0,
59
- "eval_loss": 0.5720958113670349,
60
- "eval_runtime": 66.8887,
61
- "eval_samples_per_second": 55.764,
62
- "eval_steps_per_second": 1.749,
63
- "step": 1868
64
- },
65
- {
66
- "epoch": 4.282655246252677,
67
- "grad_norm": 1.6766972541809082,
68
- "learning_rate": 1.4389721627408994e-06,
69
- "loss": 0.6073400268554687,
70
- "step": 2000
71
- },
72
- {
73
- "epoch": 5.0,
74
- "eval_loss": 0.5627617239952087,
75
- "eval_runtime": 66.8913,
76
- "eval_samples_per_second": 55.762,
77
- "eval_steps_per_second": 1.749,
78
- "step": 2335
79
- },
80
- {
81
- "epoch": 5.353319057815845,
82
- "grad_norm": 1.2230651378631592,
83
- "learning_rate": 4.648822269807281e-06,
84
- "loss": 0.5784992564808239,
85
- "step": 2500
86
- },
87
- {
88
- "epoch": 6.0,
89
- "eval_loss": 0.5057228207588196,
90
- "eval_runtime": 66.8732,
91
- "eval_samples_per_second": 55.777,
92
- "eval_steps_per_second": 1.75,
93
- "step": 2802
94
- },
95
- {
96
- "epoch": 6.423982869379015,
97
- "grad_norm": 1.8150324821472168,
98
- "learning_rate": 3.5781584582441113e-06,
99
- "loss": 0.54178466796875,
100
- "step": 3000
101
- },
102
- {
103
- "epoch": 7.0,
104
- "eval_loss": 0.4829275608062744,
105
- "eval_runtime": 66.8232,
106
- "eval_samples_per_second": 55.819,
107
- "eval_steps_per_second": 1.751,
108
- "step": 3269
109
- },
110
- {
111
- "epoch": 7.494646680942184,
112
- "grad_norm": 1.125893235206604,
113
- "learning_rate": 2.5074946466809425e-06,
114
- "loss": 0.5138602294921875,
115
- "step": 3500
116
- },
117
- {
118
- "epoch": 8.0,
119
- "eval_loss": 0.46736541390419006,
120
- "eval_runtime": 66.835,
121
- "eval_samples_per_second": 55.809,
122
- "eval_steps_per_second": 1.751,
123
- "step": 3736
124
- },
125
- {
126
- "epoch": 8.565310492505354,
127
- "grad_norm": 1.139656662940979,
128
- "learning_rate": 1.4368308351177733e-06,
129
- "loss": 0.48643429565429686,
130
- "step": 4000
131
- },
132
- {
133
- "epoch": 9.0,
134
- "eval_loss": 0.4572421610355377,
135
- "eval_runtime": 66.7921,
136
- "eval_samples_per_second": 55.845,
137
- "eval_steps_per_second": 1.752,
138
- "step": 4203
139
- },
140
- {
141
- "epoch": 9.635974304068522,
142
- "grad_norm": 1.6265816688537598,
143
- "learning_rate": 3.661670235546039e-07,
144
- "loss": 0.4844812927246094,
145
- "step": 4500
146
- },
147
- {
148
- "epoch": 10.0,
149
- "eval_loss": 0.45520129799842834,
150
- "eval_runtime": 66.8894,
151
- "eval_samples_per_second": 55.764,
152
- "eval_steps_per_second": 1.749,
153
- "step": 4670
154
  }
155
  ],
156
  "logging_steps": 500,
157
- "max_steps": 4670,
158
  "num_input_tokens_seen": 0,
159
- "num_train_epochs": 10,
160
  "save_steps": 500,
161
  "stateful_callbacks": {
162
  "TrainerControl": {
@@ -165,12 +30,12 @@
165
  "should_evaluate": false,
166
  "should_log": false,
167
  "should_save": true,
168
- "should_training_stop": true
169
  },
170
  "attributes": {}
171
  }
172
  },
173
- "total_flos": 3.947507785777152e+16,
174
  "train_batch_size": 32,
175
  "trial_name": null,
176
  "trial_params": null
 
1
  {
2
+ "best_global_step": 196,
3
+ "best_metric": 2.137895345687866,
4
+ "best_model_checkpoint": "./bert-resume/checkpoint-196",
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 196,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_loss": 2.137895345687866,
15
+ "eval_runtime": 30.7966,
16
+ "eval_samples_per_second": 57.117,
17
+ "eval_steps_per_second": 1.786,
18
+ "step": 196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 1568,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 8,
25
  "save_steps": 500,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
 
30
  "should_evaluate": false,
31
  "should_log": false,
32
  "should_save": true,
33
+ "should_training_stop": false
34
  },
35
  "attributes": {}
36
  }
37
  },
38
+ "total_flos": 1642090840040448.0,
39
  "train_batch_size": 32,
40
  "trial_name": null,
41
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e4ddd5ae672fe01abbc5d9c911cbb2a3272aa3019c09d3f6c0be52a5687dd28
3
  size 5201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b89ecfe7b715ea3f4f7f68441cad26801d1c51092cde3c13c8ced041b2a9b66
3
  size 5201