wtd commited on
Commit
8a33c3e
·
verified ·
1 Parent(s): 1d78c48

End of training

Browse files
Files changed (3) hide show
  1. README.md +41 -41
  2. model.safetensors +1 -1
  3. tokenizer.json +53 -53
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.5500
18
 
19
  ## Model description
20
 
@@ -45,46 +45,46 @@ The following hyperparameters were used during training:
45
 
46
  | Training Loss | Epoch | Step | Validation Loss |
47
  |:-------------:|:-----:|:----:|:---------------:|
48
- | 3.4851 | 1.0 | 5 | 2.9408 |
49
- | 2.5835 | 2.0 | 10 | 2.1787 |
50
- | 1.9668 | 3.0 | 15 | 1.7919 |
51
- | 1.7209 | 4.0 | 20 | 1.6411 |
52
- | 1.5973 | 5.0 | 25 | 1.5588 |
53
- | 1.5383 | 6.0 | 30 | 1.4900 |
54
- | 1.4628 | 7.0 | 35 | 1.4556 |
55
- | 1.4427 | 8.0 | 40 | 1.4175 |
56
- | 1.3979 | 9.0 | 45 | 1.3629 |
57
- | 1.3421 | 10.0 | 50 | 1.2893 |
58
- | 1.2395 | 11.0 | 55 | 1.1524 |
59
- | 1.1789 | 12.0 | 60 | 1.1692 |
60
- | 1.1266 | 13.0 | 65 | 1.0435 |
61
- | 1.0469 | 14.0 | 70 | 0.9994 |
62
- | 1.0149 | 15.0 | 75 | 1.0576 |
63
- | 1.0010 | 16.0 | 80 | 0.9336 |
64
- | 0.9451 | 17.0 | 85 | 0.8939 |
65
- | 0.9079 | 18.0 | 90 | 0.8588 |
66
- | 0.8922 | 19.0 | 95 | 0.8718 |
67
- | 0.8619 | 20.0 | 100 | 0.8359 |
68
- | 0.8458 | 21.0 | 105 | 0.7859 |
69
- | 0.8284 | 22.0 | 110 | 0.7901 |
70
- | 0.8187 | 23.0 | 115 | 0.7792 |
71
- | 0.7879 | 24.0 | 120 | 0.7507 |
72
- | 0.7589 | 25.0 | 125 | 0.7389 |
73
- | 0.7497 | 26.0 | 130 | 0.7087 |
74
- | 0.7263 | 27.0 | 135 | 0.6955 |
75
- | 0.7082 | 28.0 | 140 | 0.6706 |
76
- | 0.6982 | 29.0 | 145 | 0.6740 |
77
- | 0.6815 | 30.0 | 150 | 0.6415 |
78
- | 0.6655 | 31.0 | 155 | 0.6237 |
79
- | 0.6555 | 32.0 | 160 | 0.6191 |
80
- | 0.6435 | 33.0 | 165 | 0.6018 |
81
- | 0.6303 | 34.0 | 170 | 0.5913 |
82
- | 0.6235 | 35.0 | 175 | 0.5822 |
83
- | 0.6098 | 36.0 | 180 | 0.5722 |
84
- | 0.6044 | 37.0 | 185 | 0.5670 |
85
- | 0.5995 | 38.0 | 190 | 0.5563 |
86
- | 0.5915 | 39.0 | 195 | 0.5555 |
87
- | 0.5864 | 40.0 | 200 | 0.5500 |
88
 
89
 
90
  ### Framework versions
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.6688
18
 
19
  ## Model description
20
 
 
45
 
46
  | Training Loss | Epoch | Step | Validation Loss |
47
  |:-------------:|:-----:|:----:|:---------------:|
48
+ | 3.4045 | 1.0 | 6 | 2.7587 |
49
+ | 2.3917 | 2.0 | 12 | 1.9900 |
50
+ | 1.8734 | 3.0 | 18 | 1.6958 |
51
+ | 1.6324 | 4.0 | 24 | 1.6081 |
52
+ | 1.5676 | 5.0 | 30 | 1.5619 |
53
+ | 1.5436 | 6.0 | 36 | 1.6197 |
54
+ | 1.5139 | 7.0 | 42 | 1.4991 |
55
+ | 1.4614 | 8.0 | 48 | 1.4779 |
56
+ | 1.4407 | 9.0 | 54 | 1.4234 |
57
+ | 1.3644 | 10.0 | 60 | 1.3460 |
58
+ | 1.3096 | 11.0 | 66 | 1.3823 |
59
+ | 1.2634 | 12.0 | 72 | 1.2711 |
60
+ | 1.1912 | 13.0 | 78 | 1.2382 |
61
+ | 1.1856 | 14.0 | 84 | 1.1337 |
62
+ | 1.1019 | 15.0 | 90 | 1.2100 |
63
+ | 1.1441 | 16.0 | 96 | 1.1382 |
64
+ | 1.0611 | 17.0 | 102 | 1.0282 |
65
+ | 0.9967 | 18.0 | 108 | 0.9920 |
66
+ | 0.9765 | 19.0 | 114 | 0.9946 |
67
+ | 0.9517 | 20.0 | 120 | 0.9478 |
68
+ | 0.9374 | 21.0 | 126 | 0.9441 |
69
+ | 0.8931 | 22.0 | 132 | 0.9748 |
70
+ | 0.8756 | 23.0 | 138 | 0.8511 |
71
+ | 0.8523 | 24.0 | 144 | 0.8759 |
72
+ | 0.8757 | 25.0 | 150 | 0.8253 |
73
+ | 0.8209 | 26.0 | 156 | 0.8182 |
74
+ | 0.8190 | 27.0 | 162 | 0.7820 |
75
+ | 0.7795 | 28.0 | 168 | 0.7740 |
76
+ | 0.8097 | 29.0 | 174 | 0.7571 |
77
+ | 0.7626 | 30.0 | 180 | 0.7584 |
78
+ | 0.7491 | 31.0 | 186 | 0.7444 |
79
+ | 0.7320 | 32.0 | 192 | 0.7177 |
80
+ | 0.7235 | 33.0 | 198 | 0.7124 |
81
+ | 0.7145 | 34.0 | 204 | 0.7032 |
82
+ | 0.7085 | 35.0 | 210 | 0.6888 |
83
+ | 0.7138 | 36.0 | 216 | 0.6866 |
84
+ | 0.6910 | 37.0 | 222 | 0.6789 |
85
+ | 0.6801 | 38.0 | 228 | 0.6731 |
86
+ | 0.6819 | 39.0 | 234 | 0.6715 |
87
+ | 0.6750 | 40.0 | 240 | 0.6688 |
88
 
89
 
90
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:953334a2882f1b73612ab6395f352673190781e4f503bb027afc517c6ee080c8
3
  size 31205552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b8bb882d932f050659ba49f3b6b56b5605ef404cb44129b57bd382a88094cb
3
  size 31205552
tokenizer.json CHANGED
@@ -113,63 +113,59 @@
113
  "7": 13,
114
  "8": 14,
115
  "9": 15,
116
- "10": 16,
117
- "99": 17,
118
  "11": 18,
119
  "98": 19,
120
- "97": 20,
121
- "12": 21,
122
- "96": 22,
123
- "13": 23,
124
- "95": 24,
125
- "14": 25,
126
- "15": 26,
127
- "94": 27,
128
- "93": 28,
129
- "16": 29,
130
- "17": 30,
131
- "92": 31,
132
  "18": 32,
133
  "91": 33,
134
  "19": 34,
135
  "90": 35,
136
- "20": 36,
137
- "89": 37,
138
- "88": 38,
139
- "21": 39,
140
- "22": 40,
141
- "87": 41,
142
- "86": 42,
143
- "23": 43,
144
  "24": 44,
145
- "25": 45,
146
- "84": 46,
147
- "85": 47,
148
  "26": 48,
149
  "83": 49,
150
  "27": 50,
151
  "82": 51
152
  },
153
  "merges": [
154
- [
155
- "1",
156
- "0"
157
- ],
158
  [
159
  "9",
160
  "9"
161
  ],
162
  [
163
  "1",
164
- "1"
165
  ],
166
  [
167
- "9",
168
- "8"
169
  ],
170
  [
171
  "9",
172
- "7"
173
  ],
174
  [
175
  "1",
@@ -177,7 +173,7 @@
177
  ],
178
  [
179
  "9",
180
- "6"
181
  ],
182
  [
183
  "1",
@@ -185,14 +181,14 @@
185
  ],
186
  [
187
  "9",
188
- "5"
189
  ],
190
  [
191
  "1",
192
  "4"
193
  ],
194
  [
195
- "1",
196
  "5"
197
  ],
198
  [
@@ -200,21 +196,25 @@
200
  "4"
201
  ],
202
  [
203
- "9",
204
- "3"
205
  ],
206
  [
207
  "1",
208
  "6"
209
  ],
210
  [
211
- "1",
212
- "7"
213
  ],
214
  [
215
  "9",
216
  "2"
217
  ],
 
 
 
 
218
  [
219
  "1",
220
  "8"
@@ -231,29 +231,33 @@
231
  "9",
232
  "0"
233
  ],
 
 
 
 
234
  [
235
  "2",
236
  "0"
237
  ],
238
  [
239
- "8",
240
- "9"
241
  ],
242
  [
243
  "8",
244
  "8"
245
  ],
246
  [
247
- "2",
248
- "1"
249
  ],
250
  [
251
  "2",
252
  "2"
253
  ],
254
  [
255
- "8",
256
- "7"
257
  ],
258
  [
259
  "8",
@@ -261,11 +265,11 @@
261
  ],
262
  [
263
  "2",
264
- "3"
265
  ],
266
  [
267
- "2",
268
- "4"
269
  ],
270
  [
271
  "2",
@@ -275,10 +279,6 @@
275
  "8",
276
  "4"
277
  ],
278
- [
279
- "8",
280
- "5"
281
- ],
282
  [
283
  "2",
284
  "6"
 
113
  "7": 13,
114
  "8": 14,
115
  "9": 15,
116
+ "99": 16,
117
+ "10": 17,
118
  "11": 18,
119
  "98": 19,
120
+ "12": 20,
121
+ "97": 21,
122
+ "13": 22,
123
+ "96": 23,
124
+ "14": 24,
125
+ "95": 25,
126
+ "94": 26,
127
+ "15": 27,
128
+ "16": 28,
129
+ "93": 29,
130
+ "92": 30,
131
+ "17": 31,
132
  "18": 32,
133
  "91": 33,
134
  "19": 34,
135
  "90": 35,
136
+ "89": 36,
137
+ "20": 37,
138
+ "21": 38,
139
+ "88": 39,
140
+ "87": 40,
141
+ "22": 41,
142
+ "23": 42,
143
+ "86": 43,
144
  "24": 44,
145
+ "85": 45,
146
+ "25": 46,
147
+ "84": 47,
148
  "26": 48,
149
  "83": 49,
150
  "27": 50,
151
  "82": 51
152
  },
153
  "merges": [
 
 
 
 
154
  [
155
  "9",
156
  "9"
157
  ],
158
  [
159
  "1",
160
+ "0"
161
  ],
162
  [
163
+ "1",
164
+ "1"
165
  ],
166
  [
167
  "9",
168
+ "8"
169
  ],
170
  [
171
  "1",
 
173
  ],
174
  [
175
  "9",
176
+ "7"
177
  ],
178
  [
179
  "1",
 
181
  ],
182
  [
183
  "9",
184
+ "6"
185
  ],
186
  [
187
  "1",
188
  "4"
189
  ],
190
  [
191
+ "9",
192
  "5"
193
  ],
194
  [
 
196
  "4"
197
  ],
198
  [
199
+ "1",
200
+ "5"
201
  ],
202
  [
203
  "1",
204
  "6"
205
  ],
206
  [
207
+ "9",
208
+ "3"
209
  ],
210
  [
211
  "9",
212
  "2"
213
  ],
214
+ [
215
+ "1",
216
+ "7"
217
+ ],
218
  [
219
  "1",
220
  "8"
 
231
  "9",
232
  "0"
233
  ],
234
+ [
235
+ "8",
236
+ "9"
237
+ ],
238
  [
239
  "2",
240
  "0"
241
  ],
242
  [
243
+ "2",
244
+ "1"
245
  ],
246
  [
247
  "8",
248
  "8"
249
  ],
250
  [
251
+ "8",
252
+ "7"
253
  ],
254
  [
255
  "2",
256
  "2"
257
  ],
258
  [
259
+ "2",
260
+ "3"
261
  ],
262
  [
263
  "8",
 
265
  ],
266
  [
267
  "2",
268
+ "4"
269
  ],
270
  [
271
+ "8",
272
+ "5"
273
  ],
274
  [
275
  "2",
 
279
  "8",
280
  "4"
281
  ],
 
 
 
 
282
  [
283
  "2",
284
  "6"