Upload 3 files
Browse files- models/nanogpt_history.json +380 -0
- models/nanogpt_indus.pt +3 -0
- models/ngram_model.pkl +3 -0
models/nanogpt_history.json
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 1,
|
| 4 |
+
"train": 3.1541355614151274,
|
| 5 |
+
"val": 2.6805355599586,
|
| 6 |
+
"ppl": 14.592906579896784
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"epoch": 2,
|
| 10 |
+
"train": 2.661188569185989,
|
| 11 |
+
"val": 2.6236555132460087,
|
| 12 |
+
"ppl": 13.786026590221859
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"epoch": 3,
|
| 16 |
+
"train": 2.6222057228109668,
|
| 17 |
+
"val": 2.6189262702109968,
|
| 18 |
+
"ppl": 13.72098304458602
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"epoch": 4,
|
| 22 |
+
"train": 2.6041080951690674,
|
| 23 |
+
"val": 2.623061422337877,
|
| 24 |
+
"ppl": 13.777838869531374
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 5,
|
| 28 |
+
"train": 2.593652198623334,
|
| 29 |
+
"val": 2.612872798392113,
|
| 30 |
+
"ppl": 13.638174353311316
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 6,
|
| 34 |
+
"train": 2.5864927619695663,
|
| 35 |
+
"val": 2.6153726057803377,
|
| 36 |
+
"ppl": 13.672309810579614
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 7,
|
| 40 |
+
"train": 2.5816431431365863,
|
| 41 |
+
"val": 2.617009090616348,
|
| 42 |
+
"ppl": 13.694702656036387
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 8,
|
| 46 |
+
"train": 2.578338403786932,
|
| 47 |
+
"val": 2.600845827701244,
|
| 48 |
+
"ppl": 13.475130855096179
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"epoch": 9,
|
| 52 |
+
"train": 2.574481935905559,
|
| 53 |
+
"val": 2.5990774035453796,
|
| 54 |
+
"ppl": 13.451322166323555
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"epoch": 10,
|
| 58 |
+
"train": 2.573306300810405,
|
| 59 |
+
"val": 2.6109495010781796,
|
| 60 |
+
"ppl": 13.611969297345375
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 11,
|
| 64 |
+
"train": 2.5699439996055196,
|
| 65 |
+
"val": 2.6043824705671756,
|
| 66 |
+
"ppl": 13.522871952346348
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 12,
|
| 70 |
+
"train": 2.5692843917225088,
|
| 71 |
+
"val": 2.6044237677087176,
|
| 72 |
+
"ppl": 13.523430419834895
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 13,
|
| 76 |
+
"train": 2.567502531622137,
|
| 77 |
+
"val": 2.6015377919724645,
|
| 78 |
+
"ppl": 13.484458390986532
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 14,
|
| 82 |
+
"train": 2.5648566955434426,
|
| 83 |
+
"val": 2.6039607372689755,
|
| 84 |
+
"ppl": 13.51717010936978
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 15,
|
| 88 |
+
"train": 2.564982747925179,
|
| 89 |
+
"val": 2.606540412344831,
|
| 90 |
+
"ppl": 13.552085031411677
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 16,
|
| 94 |
+
"train": 2.565142649891121,
|
| 95 |
+
"val": 2.6077234592843563,
|
| 96 |
+
"ppl": 13.568127271622808
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"epoch": 17,
|
| 100 |
+
"train": 2.561980778484472,
|
| 101 |
+
"val": 2.5994231764306415,
|
| 102 |
+
"ppl": 13.45597407300484
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 18,
|
| 106 |
+
"train": 2.563110330541219,
|
| 107 |
+
"val": 2.6023097253860312,
|
| 108 |
+
"ppl": 13.49487151358394
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 19,
|
| 112 |
+
"train": 2.561759041356189,
|
| 113 |
+
"val": 2.5976937146897012,
|
| 114 |
+
"ppl": 13.43272259272678
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 20,
|
| 118 |
+
"train": 2.560820518593703,
|
| 119 |
+
"val": 2.5989098967389856,
|
| 120 |
+
"ppl": 13.449069167007325
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"epoch": 21,
|
| 124 |
+
"train": 2.5614392177334855,
|
| 125 |
+
"val": 2.5982438769746334,
|
| 126 |
+
"ppl": 13.440114803352545
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"epoch": 22,
|
| 130 |
+
"train": 2.558592631348542,
|
| 131 |
+
"val": 2.6015114023330366,
|
| 132 |
+
"ppl": 13.48410254568705
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"epoch": 23,
|
| 136 |
+
"train": 2.5598560087382793,
|
| 137 |
+
"val": 2.594668352857549,
|
| 138 |
+
"ppl": 13.392145158153562
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 24,
|
| 142 |
+
"train": 2.5588360891810487,
|
| 143 |
+
"val": 2.5956707951870372,
|
| 144 |
+
"ppl": 13.405576742412281
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"epoch": 25,
|
| 148 |
+
"train": 2.5575959836798057,
|
| 149 |
+
"val": 2.6016628754899855,
|
| 150 |
+
"ppl": 13.486145179966494
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 26,
|
| 154 |
+
"train": 2.5573083627969027,
|
| 155 |
+
"val": 2.6081511226106198,
|
| 156 |
+
"ppl": 13.573931103017417
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 27,
|
| 160 |
+
"train": 2.5575086841625825,
|
| 161 |
+
"val": 2.6015568743360804,
|
| 162 |
+
"ppl": 13.48471570877982
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"epoch": 28,
|
| 166 |
+
"train": 2.557002873026899,
|
| 167 |
+
"val": 2.602048819369458,
|
| 168 |
+
"ppl": 13.491351079684325
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"epoch": 29,
|
| 172 |
+
"train": 2.5560219013797387,
|
| 173 |
+
"val": 2.610209694568147,
|
| 174 |
+
"ppl": 13.601902797934866
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"epoch": 30,
|
| 178 |
+
"train": 2.554686253624303,
|
| 179 |
+
"val": 2.6000392982300293,
|
| 180 |
+
"ppl": 13.46426714647255
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 31,
|
| 184 |
+
"train": 2.5538660226655856,
|
| 185 |
+
"val": 2.5992509849528047,
|
| 186 |
+
"ppl": 13.453657268416205
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"epoch": 32,
|
| 190 |
+
"train": 2.5537610626114264,
|
| 191 |
+
"val": 2.5968120820978853,
|
| 192 |
+
"ppl": 13.420885085624993
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 33,
|
| 196 |
+
"train": 2.555091032758355,
|
| 197 |
+
"val": 2.6032118112482925,
|
| 198 |
+
"ppl": 13.50705053882672
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 34,
|
| 202 |
+
"train": 2.5560004072529927,
|
| 203 |
+
"val": 2.5948864348391267,
|
| 204 |
+
"ppl": 13.395066062193925
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"epoch": 35,
|
| 208 |
+
"train": 2.552427670785359,
|
| 209 |
+
"val": 2.593587578611171,
|
| 210 |
+
"ppl": 13.37767909124727
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"epoch": 36,
|
| 214 |
+
"train": 2.552689385733434,
|
| 215 |
+
"val": 2.603117510359338,
|
| 216 |
+
"ppl": 13.505776872008656
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"epoch": 37,
|
| 220 |
+
"train": 2.5538388692906926,
|
| 221 |
+
"val": 2.596836513661324,
|
| 222 |
+
"ppl": 13.42121298283587
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"epoch": 38,
|
| 226 |
+
"train": 2.5519965682178736,
|
| 227 |
+
"val": 2.591266675198332,
|
| 228 |
+
"ppl": 13.34666679238455
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"epoch": 39,
|
| 232 |
+
"train": 2.5516749138810804,
|
| 233 |
+
"val": 2.592155272656299,
|
| 234 |
+
"ppl": 13.358531877430071
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 40,
|
| 238 |
+
"train": 2.5518498348870446,
|
| 239 |
+
"val": 2.603394275015973,
|
| 240 |
+
"ppl": 13.509515311017257
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 41,
|
| 244 |
+
"train": 2.550012855125325,
|
| 245 |
+
"val": 2.596443589697493,
|
| 246 |
+
"ppl": 13.415940502540996
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"epoch": 42,
|
| 250 |
+
"train": 2.5510117949119637,
|
| 251 |
+
"val": 2.5943850392990924,
|
| 252 |
+
"ppl": 13.38835151927348
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"epoch": 43,
|
| 256 |
+
"train": 2.55065877762224,
|
| 257 |
+
"val": 2.6017530053219895,
|
| 258 |
+
"ppl": 13.487360738744176
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"epoch": 44,
|
| 262 |
+
"train": 2.550856338414763,
|
| 263 |
+
"val": 2.5932302386202712,
|
| 264 |
+
"ppl": 13.372899565531243
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"epoch": 45,
|
| 268 |
+
"train": 2.5491439992828027,
|
| 269 |
+
"val": 2.5965981445413955,
|
| 270 |
+
"ppl": 13.418014161374055
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"epoch": 46,
|
| 274 |
+
"train": 2.549261734155672,
|
| 275 |
+
"val": 2.6010512138934847,
|
| 276 |
+
"ppl": 13.47789874514592
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 47,
|
| 280 |
+
"train": 2.548883476427623,
|
| 281 |
+
"val": 2.5964993251130934,
|
| 282 |
+
"ppl": 13.41668826639885
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 48,
|
| 286 |
+
"train": 2.5490686427801847,
|
| 287 |
+
"val": 2.601477122053187,
|
| 288 |
+
"ppl": 13.483640314801011
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"epoch": 49,
|
| 292 |
+
"train": 2.5496274632002627,
|
| 293 |
+
"val": 2.596776560265967,
|
| 294 |
+
"ppl": 13.42040835966793
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"epoch": 50,
|
| 298 |
+
"train": 2.549438276993377,
|
| 299 |
+
"val": 2.60137410747244,
|
| 300 |
+
"ppl": 13.482251374788804
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"epoch": 51,
|
| 304 |
+
"train": 2.547333244766508,
|
| 305 |
+
"val": 2.5961117275217744,
|
| 306 |
+
"ppl": 13.411488998020356
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"epoch": 52,
|
| 310 |
+
"train": 2.547550984259163,
|
| 311 |
+
"val": 2.603273559123912,
|
| 312 |
+
"ppl": 13.507884596253747
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"epoch": 53,
|
| 316 |
+
"train": 2.5475961520735706,
|
| 317 |
+
"val": 2.606535277468093,
|
| 318 |
+
"ppl": 13.55201544330416
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 54,
|
| 322 |
+
"train": 2.5470066059912955,
|
| 323 |
+
"val": 2.6017496788755374,
|
| 324 |
+
"ppl": 13.48731587383552
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 55,
|
| 328 |
+
"train": 2.544834245528494,
|
| 329 |
+
"val": 2.5987642572281207,
|
| 330 |
+
"ppl": 13.447110593778538
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"epoch": 56,
|
| 334 |
+
"train": 2.5448448940047195,
|
| 335 |
+
"val": 2.608545351535716,
|
| 336 |
+
"ppl": 13.579283394230176
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"epoch": 57,
|
| 340 |
+
"train": 2.5467686969786882,
|
| 341 |
+
"val": 2.597899075518263,
|
| 342 |
+
"ppl": 13.43548143103723
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"epoch": 58,
|
| 346 |
+
"train": 2.5460665178086077,
|
| 347 |
+
"val": 2.597285856591894,
|
| 348 |
+
"ppl": 13.427245065144662
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"epoch": 59,
|
| 352 |
+
"train": 2.5445679192032133,
|
| 353 |
+
"val": 2.5995786671942853,
|
| 354 |
+
"ppl": 13.458066515362539
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"epoch": 60,
|
| 358 |
+
"train": 2.544777887208121,
|
| 359 |
+
"val": 2.603335055899113,
|
| 360 |
+
"ppl": 13.508715313139149
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 61,
|
| 364 |
+
"train": 2.544875491144402,
|
| 365 |
+
"val": 2.6060757269250585,
|
| 366 |
+
"ppl": 13.545789038031518
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"epoch": 62,
|
| 370 |
+
"train": 2.5440730787813663,
|
| 371 |
+
"val": 2.603364035170129,
|
| 372 |
+
"ppl": 13.50910679153364
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"epoch": 63,
|
| 376 |
+
"train": 2.5455543795334443,
|
| 377 |
+
"val": 2.5958931395348084,
|
| 378 |
+
"ppl": 13.408557728019938
|
| 379 |
+
}
|
| 380 |
+
]
|
models/nanogpt_indus.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed49e090e63ad228880bc1b8802c5fa9f99cb2af6a836bf4552a456c152ae3fb
|
| 3 |
+
size 625493
|
models/ngram_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a61af5701396600eb9c86492c1f9d667fd98f5a3f2926762bab9af262a6f1cf
|
| 3 |
+
size 139795
|