update model
Browse files- .gitattributes +2 -0
- added_tokens.json +1 -0
- config.json +25 -0
- events.out.tfevents.1631471189.blg4302.int.ets1.calculquebec.ca.240020.0 +3 -0
- job-25031358-head.out +500 -0
- job-25031358-tail.out +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
events.out.tfevents.1631471189.blg4302.int.ets1.calculquebec.ca.240020.0 filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"$pi$": 30522, "$vert$": 30523, "$alpha$": 30524, "$minus$": 30525, "$p$": 30526, "$frac$": 30527, "$q$": 30528, "$lt$": 30529, "$1$": 30530, "$($": 30531, "$2$": 30532, "$supscript$": 30533, "$)$": 30534, "$varepsilon$": 30535, "$x$": 30536, "$subscript$": 30537, "${$": 30538, "$n$": 30539, "$}$": 30540, "$v$": 30541, "$a$": 30542, "$r$": 30543, "$rightarrow$": 30544, "$infty$": 30545, "$omega$": 30546, "$equal$": 30547, "$[$": 30548, "$comma$": 30549, "$]$": 30550, "$\\begin{cases}$": 30551, "$column$": 30552, "$0$": 30553, "$le$": 30554, "$row$": 30555, "$ast$": 30556, "$\\end{cases}$": 30557, "$qquad$": 30558, "$in$": 30559, "$e$": 30560, "$to$": 30561, "$l$": 30562, "$phi$": 30563, "$colon$": 30564, "$z$": 30565, "$i$": 30566, "$5$": 30567, "$plus$": 30568, "$cong$": 30569, "$b$": 30570, "$mod$": 30571, "$ker$": 30572, "$mapsto$": 30573, "$equiv$": 30574, "$subset$": 30575, "$f$": 30576, "$y$": 30577, "$sin$": 30578, "$times$": 30579, "$cos$": 30580, "$t$": 30581, "$float$": 30582, "$fact$": 30583, "$3$": 30584, "$dots$": 30585, "$k$": 30586, "$sum$": 30587, "$gt$": 30588, "$\\left($": 30589, "$\\right)$": 30590, "$m$": 30591, "$ldots$": 30592, "$9$": 30593, "$leftarrow$": 30594, "$prime$": 30595, "$h$": 30596, "$approx$": 30597, "$root$": 30598, "$pm$": 30599, "$c$": 30600, "$d$": 30601, "$beta$": 30602, "$ge$": 30603, "$g$": 30604, "$epsilon$": 30605, "$j$": 30606, "$u$": 30607, "$aleph$": 30608, "$neq$": 30609, "$cdots$": 30610, "$lambda$": 30611, "$mu$": 30612, "$nu$": 30613, "$geq$": 30614, "$\\{$": 30615, "$\\}$": 30616, "$s$": 30617, "$subseteq$": 30618, "$max$": 30619, "$kappa$": 30620, "$oplus$": 30621, "$dim$": 30622, "$prod$": 30623, "$8$": 30624, "$somenum$": 30625, "$circ$": 30626, "$4$": 30627, "$7$": 30628, "$forall$": 30629, "$ne$": 30630, "$o$": 30631, "$w$": 30632, "$arg$": 30633, "$exp$": 30634, "$6$": 30635, "$\\begin{pmatrix}$": 30636, "$\\end{pmatrix}$": 30637, "$theta$": 30638, "$tan$": 30639, "$semicolon$": 30640, "$quad$": 30641, "$newline$": 30642, "$ni$": 30643, "$cup$": 30644, "$varpi$": 30645, "$vee$": 30646, "$langle$": 30647, "$rangle$": 30648, "$delta$": 30649, "$gg$": 30650, "$log$": 30651, "$\\lceil$": 30652, "$\\rceil$": 30653, "$\\begin{array}$": 30654, "$\\end{array}$": 30655, "$pr$": 30656, "$bigcap$": 30657, "$sim$": 30658, "$lim$": 30659, "$\\left [$": 30660, "$\\right ]$": 30661, "$\\left ($": 30662, "$\\right )$": 30663, "$longmapsto$": 30664, "$rvect$": 30665, "$rho$": 30666, "$int$": 30667, "$nabla$": 30668, "$sigma$": 30669, "$cap$": 30670, "$iff$": 30671, "$\\over$": 30672, "$wedge$": 30673, "$bigoplus$": 30674, "$otimes$": 30675, "$partial$": 30676, "$\\begin{bmatrix}$": 30677, "$\\end{bmatrix}$": 30678, "$\\left\\langle$": 30679, "$\\right\\rangle$": 30680, "$\\left\\$": 30681, "$\\right\\$": 30682, "$\\left|$": 30683, "$\\right|$": 30684, "$chi$": 30685, "$\\left.$": 30686, "$psi$": 30687, "$gamma$": 30688, "$cot$": 30689, "$\\left[$": 30690, "$\\right]$": 30691, "$zeta$": 30692, "$xrightarrow$": 30693, "$ln$": 30694, "$setminus$": 30695, "$unlhd$": 30696, "$xi$": 30697, "$ll$": 30698, "$implies$": 30699, "$uparrow$": 30700, "$\\underbrace$": 30701, "$cr$": 30702, "$longleftrightarrow$": 30703, "$tau$": 30704, "$mid$": 30705, "$varphi$": 30706, "$geqslant$": 30707, "$angle$": 30708, "$longrightarrow$": 30709, "$exists$": 30710, "$inf$": 30711, "$sup$": 30712, "$\\begin{matrix}$": 30713, "$\\end{matrix}$": 30714, "$top$": 30715, "$bot$": 30716, "$simeq$": 30717, "$det$": 30718, "$\\right/$": 30719, "$\\overset$": 30720, "$emptyset$": 30721, "$\\stackrel$": 30722, "$\\left\\{$": 30723, "$\\lfloor$": 30724, "$\\rfloor$": 30725, "$\\right.$": 30726, "$binom$": 30727, "$ell$": 30728, "$sec$": 30729, "$arccos$": 30730, "$vdots$": 30731, "$ddots$": 30732, "$\\right\\}$": 30733, "$leqslant$": 30734, "$eta$": 30735, "$\\begin{smallmatrix}$": 30736, "$\\end{smallmatrix}$": 30737, "$percent$": 30738, "$oint$": 30739, "$min$": 30740, "$hbar$": 30741, "$ddot$": 30742, "$varnothing$": 30743, "$cosh$": 30744, "$downarrow$": 30745, "$gcd$": 30746, "$rightarrowtail$": 30747, "$supset$": 30748, "$\\underset$": 30749, "$bigcup$": 30750, "$preceq$": 30751, "$\\array{$": 30752, "$triangleq$": 30753, "$iota$": 30754, "$leftrightarrow$": 30755, "$arctan$": 30756, "$arcsin$": 30757, "$sinh$": 30758, "$triangle$": 30759, "$coprod$": 30760, "$neg$": 30761, "$land$": 30762, "$lor$": 30763, "$measuredangle$": 30764, "$wp$": 30765, "$backslash$": 30766, "$vartheta$": 30767, "$odot$": 30768, "$perp$": 30769, "$tanh$": 30770, "$trianglelefteq$": 30771, "$\\left\\lfloor$": 30772, "$\\right\\rfloor$": 30773, "$supseteq$": 30774, "$sign$": 30775, "$dotsc$": 30776, "$nmid$": 30777, "$smallsetminus$": 30778, "$and$": 30779, "$\\left\\lceil$": 30780, "$\\right\\rceil$": 30781, "$deg$": 30782, "$impliedby$": 30783, "$\\left<$": 30784, "$\\right>$": 30785, "$searrow$": 30786, "$limsup$": 30787, "$succeq$": 30788, "$rightharpoonup$": 30789, "$bigtriangleup$": 30790, "$sqcup$": 30791, "$subsetneq$": 30792, "$\\left \\{$": 30793, "$\\right \\}$": 30794, "$\\left |$": 30795, "$\\right |$": 30796, "$nle$": 30797, "$lnot$": 30798, "$iint$": 30799, "$hom$": 30800, "$leadsto$": 30801, "$nexists$": 30802, "$re$": 30803, "$\\begin{vmatrix}$": 30804, "$\\end{vmatrix}$": 30805, "$parallel$": 30806, "$dotsb$": 30807, "$bigwedge$": 30808, "$succ$": 30809, "$\\buildrel$": 30810, "$liminf$": 30811, "$csc$": 30812, "$wr$": 30813, "$hookrightarrow$": 30814, "$\\lbrace$": 30815, "$\\rbrace$": 30816, "$dotsm$": 30817, "$rtimes$": 30818, "$ltimes$": 30819, "$\\of$": 30820, "$updownarrow$": 30821, "$nearrow$": 30822, "$\\left \\lfloor$": 30823, "$\\right \\rfloor$": 30824, "$approxeq$": 30825, "$dashv$": 30826, "$bigcirc$": 30827, "$triangledown$": 30828, "$lcm$": 30829, "$prec$": 30830, "$propto$": 30831, "$triangleleft$": 30832, "$ncong$": 30833, "$coth$": 30834, "$longleftarrow$": 30835, "$upsilon$": 30836, "$thicksim$": 30837, "$\\left$": 30838, "$\\right$": 30839, "$bigtriangledown$": 30840, "$varliminf$": 30841, "$varlimsup$": 30842, "$atop$": 30843, "$\\overbrace$": 30844, "$\\left \\langle$": 30845, "$\\right \\rangle$": 30846, "$gets$": 30847, "$vartriangleleft$": 30848, "$iiint$": 30849, "$varinjlim$": 30850, "$varprojlim$": 30851, "$bigotimes$": 30852, "$varrho$": 30853, "$lesssim$": 30854, "$\\left \\$": 30855, "$\\right \\$": 30856, "$bigsqcup$": 30857, "$supsetneq$": 30858, "$curvearrowright$": 30859, "$dotso$": 30860, "$preccurlyeq$": 30861, "$imath$": 30862, "$omicron$": 30863, "$nrightarrow$": 30864, "$lneq$": 30865, "$upharpoonright$": 30866, "$nsubseteq$": 30867, "$enspace$": 30868, "$geqq$": 30869, "$rightrightarrows$": 30870, "$uplus$": 30871, "$owns$": 30872, "$rightsquigarrow$": 30873, "$vartriangle$": 30874, "$trianglerighteq$": 30875, "$amalg$": 30876, "$frown$": 30877, "$jmath$": 30878, "$\\left <$": 30879, "$\\right >$": 30880, "$beth$": 30881, "$sphericalangle$": 30882, "$gtrapprox$": 30883, "$lessapprox$": 30884, "$bigvee$": 30885, "$nsim$": 30886, "$swarrow$": 30887, "$asymp$": 30888, "$bigodot$": 30889, "$dotsi$": 30890, "$\\left |$": 30891, "$intop$": 30892, "$sqsupset$": 30893, "$\\left \\lceil$": 30894, "$\\right \\rceil$": 30895, "$empty$": 30896, "$rightleftharpoons$": 30897, "$\\left .$": 30898, "$smallint$": 30899, "$gtrless$": 30900, "$ngtr$": 30901, "$\\lbrack$": 30902, "$arrowvert$": 30903, "$xleftarrow$": 30904, "$\\left \\{$": 30905, "$\\right \\}$": 30906, "$vardelta$": 30907, "$looparrowright$": 30908, "$nge$": 30909, "$gneq$": 30910, "$\\left\\downarrow$": 30911, "$subsetneqq$": 30912, "$idotsint$": 30913, "$nless$": 30914, "$nprec$": 30915, "$nwarrow$": 30916, "$circlearrowright$": 30917, "$ominus$": 30918, "$\\left/$": 30919, "$gtrsim$": 30920, "$succcurlyeq$": 30921, "$sqsubseteq$": 30922, "$backsim$": 30923, "$dddot$": 30924, "$precsim$": 30925, "$divideontimes$": 30926, "$triangleright$": 30927, "$sqcap$": 30928, "$\\right )$": 30929, "$lessdot$": 30930, "$nsupseteq$": 30931, "$nleqslant$": 30932, "$\\left /$": 30933, "$varsubsetneq$": 30934, "$\\rbrack$": 30935, "$rightharpoondown$": 30936, "$\\right .$": 30937, "$succsim$": 30938, "$leftleftarrows$": 30939, "$varsupsetneq$": 30940, "$iddots$": 30941, "$vargamma$": 30942, "$sqsubset$": 30943, "$downharpoonright$": 30944, "$sqsupseteq$": 30945, "$varsigma$": 30946, "$\\right .$": 30947, "$veebar$": 30948, "$ddddot$": 30949, "$circlearrowleft$": 30950, "$leftrightarrows$": 30951, "$\\right )$": 30952, "$thickapprox$": 30953, "$npreceq$": 30954, "$biguplus$": 30955, "$supsetneqq$": 30956, "$rrightarrow$": 30957, "$smallfrown$": 30958, "$multimap$": 30959, "$subseteqq$": 30960, "$hookleftarrow$": 30961, "$rightleftarrows$": 30962, "$nparallel$": 30963, "$nsucc$": 30964, "$ggg$": 30965, "$vartriangleright$": 30966, "$varpropto$": 30967, "$\\left ($": 30968, "$\\right ]$": 30969, "$\\left [$": 30970, "$\\left \\lgroup$": 30971, "$\\right \\rgroup$": 30972, "$\\left \\$": 30973, "$leftrightsquigarrow$": 30974, "$\\right )$": 30975, "$lsh$": 30976, "$\\lgroup$": 30977, "$\\rgroup$": 30978, "$mho$": 30979, "$upharpoonleft$": 30980, "$eth$": 30981, "$nleftarrow$": 30982, "$smallsmile$": 30983, "$nleftrightarrow$": 30984, "$iiiint$": 30985, "$gneqq$": 30986, "$lneqq$": 30987, "$downdownarrows$": 30988, "$varkappa$": 30989, "$backsimeq$": 30990, "$barwedge$": 30991, "$unrhd$": 30992, "$bowtie$": 30993, "$backepsilon$": 30994, "$bracevert$": 30995, "$\\right \\rfloor$": 30996, "$\\left \\lfloor$": 30997, "$precneqq$": 30998, "$\\left\\lgroup$": 30999, "$\\right\\rgroup$": 31000, "$\\left \\langle$": 31001, "$\\lmoustache$": 31002, "$\\right \\}$": 31003, "$\\right |$": 31004, "$curlywedge$": 31005, "$\\right ]$": 31006, "$gtreqqless$": 31007, "$gtreqless$": 31008, "$ngeqslant$": 31009, "$between$": 31010, "$dotplus$": 31011, "$leftrightharpoons$": 31012, "$\\right \\rceil$": 31013, "$leftharpoondown$": 31014, "$projlim$": 31015, "$\\right )$": 31016, "$varsubsetneqq$": 31017, "$lvect$": 31018, "$\\left\\backslash$": 31019, "$supseteqq$": 31020, "$join$": 31021, "$injlim$": 31022, "$eqsim$": 31023, "$curvearrowleft$": 31024, "$succneqq$": 31025, "$\\left\\uparrow$": 31026, "$leftarrowtail$": 31027, "$upuparrows$": 31028, "$gvertneqq$": 31029, "$lvertneqq$": 31030, "$nsucceq$": 31031, "$gtrdot$": 31032, "$eqslantless$": 31033, "$\\right \\$": 31034, "$circeq$": 31035, "$\\left ($": 31036, "$curlyvee$": 31037, "$\\right \\}$": 31038, "$\\right \\rangle$": 31039, "$downharpoonleft$": 31040, "$\\left ($": 31041, "$\\right \\rangle$": 31042, "$\\left \\{$": 31043, "$\\left\\updownarrow$": 31044, "$\\right )$": 31045, "$\\right >$": 31046, "$leftharpoonup$": 31047, "$curlyeqprec$": 31048, "$\\left [$": 31049, "$eqslantgtr$": 31050, "$varxi$": 31051, "$\\right \\rfloor$": 31052, "$\\right \\}$": 31053, "$varpsi$": 31054, "$\\left .$": 31055, "$\\left |$": 31056, "$\\right /$": 31057, "$\\left ($": 31058, "$\\left \\$": 31059, "$rsh$": 31060}
|
config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "data.arjmPWtGwzKrkmR/bert-base-uncased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForPreTraining"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 3072,
|
| 13 |
+
"layer_norm_eps": 1e-12,
|
| 14 |
+
"max_position_embeddings": 512,
|
| 15 |
+
"model_type": "bert",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 12,
|
| 18 |
+
"pad_token_id": 0,
|
| 19 |
+
"position_embedding_type": "absolute",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.9.2",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 31061
|
| 25 |
+
}
|
events.out.tfevents.1631471189.blg4302.int.ets1.calculquebec.ca.240020.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2caf095b1b4f07fc75087fdecc23f9c0e9edf3201ed001624f4fc06c9a8a83e7
|
| 3 |
+
size 53784629
|
job-25031358-head.out
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
+ TRAINER=pretrain
|
| 2 |
+
+ SETUP=for-newvocab
|
| 3 |
+
++ cd pya0
|
| 4 |
+
++ pwd
|
| 5 |
+
++ git rev-parse HEAD
|
| 6 |
+
+ CODE_VER='/home/w32zhong/projects/rrg-jimmylin/w32zhong/pya0
|
| 7 |
+
8f207c0036a9f81f91e26f7ecedcfa84025ae680'
|
| 8 |
+
+ COMMAND='/var/spool/slurmd/job25031358/slurm_script pretrain for-newvocab'
|
| 9 |
+
+ EPOCHS=40
|
| 10 |
+
+ TEST_CYCLE=100
|
| 11 |
+
+ case $TRAINER-${SETUP} in
|
| 12 |
+
+ DEV_BSIZE=8
|
| 13 |
+
+ SAVE_FOLD=10
|
| 14 |
+
+ DATA_VER=arjmPWtGwzKrkmR
|
| 15 |
+
+ START_POINT=bert-base-uncased
|
| 16 |
+
+ TOK_CKPOINT=bert-tokenizer
|
| 17 |
+
+ SHARDS_LIST=shards-for-newvocab.txt
|
| 18 |
+
+ TEST_FILE=test.txt
|
| 19 |
+
+ EXTRA_DAT=mse-aops-2021-vocab.pkl
|
| 20 |
+
+ EXTRA_ARG=
|
| 21 |
+
+ DATA_DIR=data.arjmPWtGwzKrkmR
|
| 22 |
+
+ set -e
|
| 23 |
+
+ '[' '!' -e data.arjmPWtGwzKrkmR ']'
|
| 24 |
+
+ set +e
|
| 25 |
+
++ cat /var/spool/slurmd/job25031358/slurm_script
|
| 26 |
+
++ grep -Po '(?<=SBATCH --nodes=)[0-9]+'
|
| 27 |
+
+ N_NODE=4
|
| 28 |
+
++ cat /var/spool/slurmd/job25031358/slurm_script
|
| 29 |
+
++ grep -Po '(?<=SBATCH --gres=gpu:)[0-9]+'
|
| 30 |
+
+ N_GPUS=2
|
| 31 |
+
+ export NCCL_BLOCKING_WAIT=1
|
| 32 |
+
+ NCCL_BLOCKING_WAIT=1
|
| 33 |
+
+ export SLURM_ACCOUNT=def-jimmylin
|
| 34 |
+
+ SLURM_ACCOUNT=def-jimmylin
|
| 35 |
+
+ export SBATCH_ACCOUNT=def-jimmylin
|
| 36 |
+
+ SBATCH_ACCOUNT=def-jimmylin
|
| 37 |
+
+ export SALLOC_ACCOUNT=def-jimmylin
|
| 38 |
+
+ SALLOC_ACCOUNT=def-jimmylin
|
| 39 |
+
+ which srun
|
| 40 |
+
/opt/software/slurm/bin/srun
|
| 41 |
+
++ hostname
|
| 42 |
+
+ srun --unbuffered python ./pya0/utils/transformer.py pretrain data.arjmPWtGwzKrkmR/bert-base-uncased data.arjmPWtGwzKrkmR/bert-tokenizer data.arjmPWtGwzKrkmR/mse-aops-2021-vocab.pkl --test_file data.arjmPWtGwzKrkmR/test.txt --test_cycle 100 --shards_list data.arjmPWtGwzKrkmR/shards-for-newvocab.txt --cluster tcp://blg4302.int.ets1.calculquebec.ca:8912 --batch_size 64 --save_fold 10 --epochs 40
|
| 43 |
+
Loading model data.arjmPWtGwzKrkmR/bert-base-uncased...
|
| 44 |
+
{
|
| 45 |
+
"_name_or_path": "data.arjmPWtGwzKrkmR/bert-base-uncased",
|
| 46 |
+
"add_cross_attention": false,
|
| 47 |
+
"architectures": [
|
| 48 |
+
"BertForPreTraining"
|
| 49 |
+
],
|
| 50 |
+
"attention_probs_dropout_prob": 0.1,
|
| 51 |
+
"bad_words_ids": null,
|
| 52 |
+
"bos_token_id": null,
|
| 53 |
+
"chunk_size_feed_forward": 0,
|
| 54 |
+
"decoder_start_token_id": null,
|
| 55 |
+
"diversity_penalty": 0.0,
|
| 56 |
+
"do_sample": false,
|
| 57 |
+
"early_stopping": false,
|
| 58 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 59 |
+
"eos_token_id": null,
|
| 60 |
+
"finetuning_task": null,
|
| 61 |
+
"forced_bos_token_id": null,
|
| 62 |
+
"forced_eos_token_id": null,
|
| 63 |
+
"gradient_checkpointing": false,
|
| 64 |
+
"hidden_act": "gelu",
|
| 65 |
+
"hidden_dropout_prob": 0.1,
|
| 66 |
+
"hidden_size": 768,
|
| 67 |
+
"id2label": {
|
| 68 |
+
"0": "LABEL_0",
|
| 69 |
+
"1": "LABEL_1"
|
| 70 |
+
},
|
| 71 |
+
"initializer_range": 0.02,
|
| 72 |
+
"intermediate_size": 3072,
|
| 73 |
+
"is_decoder": false,
|
| 74 |
+
"is_encoder_decoder": false,
|
| 75 |
+
"label2id": {
|
| 76 |
+
"LABEL_0": 0,
|
| 77 |
+
"LABEL_1": 1
|
| 78 |
+
},
|
| 79 |
+
"layer_norm_eps": 1e-12,
|
| 80 |
+
"length_penalty": 1.0,
|
| 81 |
+
"max_length": 20,
|
| 82 |
+
"max_position_embeddings": 512,
|
| 83 |
+
"min_length": 0,
|
| 84 |
+
"model_type": "bert",
|
| 85 |
+
"no_repeat_ngram_size": 0,
|
| 86 |
+
"num_attention_heads": 12,
|
| 87 |
+
"num_beam_groups": 1,
|
| 88 |
+
"num_beams": 1,
|
| 89 |
+
"num_hidden_layers": 12,
|
| 90 |
+
"num_return_sequences": 1,
|
| 91 |
+
"output_attentions": false,
|
| 92 |
+
"output_hidden_states": false,
|
| 93 |
+
"output_scores": false,
|
| 94 |
+
"pad_token_id": 0,
|
| 95 |
+
"position_embedding_type": "absolute",
|
| 96 |
+
"prefix": null,
|
| 97 |
+
"problem_type": null,
|
| 98 |
+
"pruned_heads": {},
|
| 99 |
+
"remove_invalid_values": false,
|
| 100 |
+
"repetition_penalty": 1.0,
|
| 101 |
+
"return_dict": true,
|
| 102 |
+
"return_dict_in_generate": false,
|
| 103 |
+
"sep_token_id": null,
|
| 104 |
+
"task_specific_params": null,
|
| 105 |
+
"temperature": 1.0,
|
| 106 |
+
"tie_encoder_decoder": false,
|
| 107 |
+
"tie_word_embeddings": true,
|
| 108 |
+
"tokenizer_class": null,
|
| 109 |
+
"top_k": 50,
|
| 110 |
+
"top_p": 1.0,
|
| 111 |
+
"torch_dtype": null,
|
| 112 |
+
"torchscript": false,
|
| 113 |
+
"transformers_version": "4.9.2",
|
| 114 |
+
"type_vocab_size": 2,
|
| 115 |
+
"use_bfloat16": false,
|
| 116 |
+
"use_cache": true,
|
| 117 |
+
"vocab_size": 30522
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
Before loading new vocabulary: 30522
|
| 121 |
+
After loading new vocabulary: 31061
|
| 122 |
+
Resize model embedding and save new tokenizer ...
|
| 123 |
+
Invoke training ...
|
| 124 |
+
[caller] pretrain
|
| 125 |
+
[node#3 rank#6] Training on device cuda:0
|
| 126 |
+
[node#3 rank#6] 2 x Tesla V100-SXM2-16GB: 0%
|
| 127 |
+
[node#3 rank#7] Training on device cuda:1
|
| 128 |
+
[node#3 rank#7] 2 x Tesla V100-SXM2-16GB: 5%
|
| 129 |
+
[node#3 rank#6] Initialized process group ...
|
| 130 |
+
[node#3 rank#7] Initialized process group ...
|
| 131 |
+
Loading model data.arjmPWtGwzKrkmR/bert-base-uncased...
|
| 132 |
+
Loading model data.arjmPWtGwzKrkmR/bert-base-uncased...
|
| 133 |
+
Loading model data.arjmPWtGwzKrkmR/bert-base-uncased...
|
| 134 |
+
{
|
| 135 |
+
"_name_or_path": "data.arjmPWtGwzKrkmR/bert-base-uncased",
|
| 136 |
+
"add_cross_attention": false,
|
| 137 |
+
"architectures": [
|
| 138 |
+
"BertForPreTraining"
|
| 139 |
+
],
|
| 140 |
+
"attention_probs_dropout_prob": 0.1,
|
| 141 |
+
"bad_words_ids": null,
|
| 142 |
+
"bos_token_id": null,
|
| 143 |
+
"chunk_size_feed_forward": 0,
|
| 144 |
+
"decoder_start_token_id": null,
|
| 145 |
+
"diversity_penalty": 0.0,
|
| 146 |
+
"do_sample": false,
|
| 147 |
+
"early_stopping": false,
|
| 148 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 149 |
+
"eos_token_id": null,
|
| 150 |
+
"finetuning_task": null,
|
| 151 |
+
"forced_bos_token_id": null,
|
| 152 |
+
"forced_eos_token_id": null,
|
| 153 |
+
"gradient_checkpointing": false,
|
| 154 |
+
"hidden_act": "gelu",
|
| 155 |
+
"hidden_dropout_prob": 0.1,
|
| 156 |
+
"hidden_size": 768,
|
| 157 |
+
"id2label": {
|
| 158 |
+
"0": "LABEL_0",
|
| 159 |
+
"1": "LABEL_1"
|
| 160 |
+
},
|
| 161 |
+
"initializer_range": 0.02,
|
| 162 |
+
"intermediate_size": 3072,
|
| 163 |
+
"is_decoder": false,
|
| 164 |
+
"is_encoder_decoder": false,
|
| 165 |
+
"label2id": {
|
| 166 |
+
"LABEL_0": 0,
|
| 167 |
+
"LABEL_1": 1
|
| 168 |
+
},
|
| 169 |
+
"layer_norm_eps": 1e-12,
|
| 170 |
+
"length_penalty": 1.0,
|
| 171 |
+
"max_length": 20,
|
| 172 |
+
"max_position_embeddings": 512,
|
| 173 |
+
"min_length": 0,
|
| 174 |
+
"model_type": "bert",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"num_attention_heads": 12,
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_hidden_layers": 12,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad_token_id": 0,
|
| 185 |
+
"position_embedding_type": "absolute",
|
| 186 |
+
"prefix": null,
|
| 187 |
+
"problem_type": null,
|
| 188 |
+
"pruned_heads": {},
|
| 189 |
+
"remove_invalid_values": false,
|
| 190 |
+
"repetition_penalty": 1.0,
|
| 191 |
+
"return_dict": true,
|
| 192 |
+
"return_dict_in_generate": false,
|
| 193 |
+
"sep_token_id": null,
|
| 194 |
+
"task_specific_params": null,
|
| 195 |
+
"temperature": 1.0,
|
| 196 |
+
"tie_encoder_decoder": false,
|
| 197 |
+
"tie_word_embeddings": true,
|
| 198 |
+
"tokenizer_class": null,
|
| 199 |
+
"top_k": 50,
|
| 200 |
+
"top_p": 1.0,
|
| 201 |
+
"torch_dtype": null,
|
| 202 |
+
"torchscript": false,
|
| 203 |
+
"transformers_version": "4.9.2",
|
| 204 |
+
"type_vocab_size": 2,
|
| 205 |
+
"use_bfloat16": false,
|
| 206 |
+
"use_cache": true,
|
| 207 |
+
"vocab_size": 30522
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
{
|
| 211 |
+
"_name_or_path": "data.arjmPWtGwzKrkmR/bert-base-uncased",
|
| 212 |
+
"add_cross_attention": false,
|
| 213 |
+
"architectures": [
|
| 214 |
+
"BertForPreTraining"
|
| 215 |
+
],
|
| 216 |
+
"attention_probs_dropout_prob": 0.1,
|
| 217 |
+
"bad_words_ids": null,
|
| 218 |
+
"bos_token_id": null,
|
| 219 |
+
"chunk_size_feed_forward": 0,
|
| 220 |
+
"decoder_start_token_id": null,
|
| 221 |
+
"diversity_penalty": 0.0,
|
| 222 |
+
"do_sample": false,
|
| 223 |
+
"early_stopping": false,
|
| 224 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 225 |
+
"eos_token_id": null,
|
| 226 |
+
"finetuning_task": null,
|
| 227 |
+
"forced_bos_token_id": null,
|
| 228 |
+
"forced_eos_token_id": null,
|
| 229 |
+
"gradient_checkpointing": false,
|
| 230 |
+
"hidden_act": "gelu",
|
| 231 |
+
"hidden_dropout_prob": 0.1,
|
| 232 |
+
"hidden_size": 768,
|
| 233 |
+
"id2label": {
|
| 234 |
+
"0": "LABEL_0",
|
| 235 |
+
"1": "LABEL_1"
|
| 236 |
+
},
|
| 237 |
+
"initializer_range": 0.02,
|
| 238 |
+
"intermediate_size": 3072,
|
| 239 |
+
"is_decoder": false,
|
| 240 |
+
"is_encoder_decoder": false,
|
| 241 |
+
"label2id": {
|
| 242 |
+
"LABEL_0": 0,
|
| 243 |
+
"LABEL_1": 1
|
| 244 |
+
},
|
| 245 |
+
"layer_norm_eps": 1e-12,
|
| 246 |
+
"length_penalty": 1.0,
|
| 247 |
+
"max_length": 20,
|
| 248 |
+
"max_position_embeddings": 512,
|
| 249 |
+
"min_length": 0,
|
| 250 |
+
"model_type": "bert",
|
| 251 |
+
"no_repeat_ngram_size": 0,
|
| 252 |
+
"num_attention_heads": 12,
|
| 253 |
+
"num_beam_groups": 1,
|
| 254 |
+
"num_beams": 1,
|
| 255 |
+
"num_hidden_layers": 12,
|
| 256 |
+
"num_return_sequences": 1,
|
| 257 |
+
"output_attentions": false,
|
| 258 |
+
"output_hidden_states": false,
|
| 259 |
+
"output_scores": false,
|
| 260 |
+
"pad_token_id": 0,
|
| 261 |
+
"position_embedding_type": "absolute",
|
| 262 |
+
"prefix": null,
|
| 263 |
+
"problem_type": null,
|
| 264 |
+
"pruned_heads": {},
|
| 265 |
+
"remove_invalid_values": false,
|
| 266 |
+
"repetition_penalty": 1.0,
|
| 267 |
+
"return_dict": true,
|
| 268 |
+
"return_dict_in_generate": false,
|
| 269 |
+
"sep_token_id": null,
|
| 270 |
+
"task_specific_params": null,
|
| 271 |
+
"temperature": 1.0,
|
| 272 |
+
"tie_encoder_decoder": false,
|
| 273 |
+
"tie_word_embeddings": true,
|
| 274 |
+
"tokenizer_class": null,
|
| 275 |
+
"top_k": 50,
|
| 276 |
+
"top_p": 1.0,
|
| 277 |
+
"torch_dtype": null,
|
| 278 |
+
"torchscript": false,
|
| 279 |
+
"transformers_version": "4.9.2",
|
| 280 |
+
"type_vocab_size": 2,
|
| 281 |
+
"use_bfloat16": false,
|
| 282 |
+
"use_cache": true,
|
| 283 |
+
"vocab_size": 30522
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
{
|
| 287 |
+
"_name_or_path": "data.arjmPWtGwzKrkmR/bert-base-uncased",
|
| 288 |
+
"add_cross_attention": false,
|
| 289 |
+
"architectures": [
|
| 290 |
+
"BertForPreTraining"
|
| 291 |
+
],
|
| 292 |
+
"attention_probs_dropout_prob": 0.1,
|
| 293 |
+
"bad_words_ids": null,
|
| 294 |
+
"bos_token_id": null,
|
| 295 |
+
"chunk_size_feed_forward": 0,
|
| 296 |
+
"decoder_start_token_id": null,
|
| 297 |
+
"diversity_penalty": 0.0,
|
| 298 |
+
"do_sample": false,
|
| 299 |
+
"early_stopping": false,
|
| 300 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 301 |
+
"eos_token_id": null,
|
| 302 |
+
"finetuning_task": null,
|
| 303 |
+
"forced_bos_token_id": null,
|
| 304 |
+
"forced_eos_token_id": null,
|
| 305 |
+
"gradient_checkpointing": false,
|
| 306 |
+
"hidden_act": "gelu",
|
| 307 |
+
"hidden_dropout_prob": 0.1,
|
| 308 |
+
"hidden_size": 768,
|
| 309 |
+
"id2label": {
|
| 310 |
+
"0": "LABEL_0",
|
| 311 |
+
"1": "LABEL_1"
|
| 312 |
+
},
|
| 313 |
+
"initializer_range": 0.02,
|
| 314 |
+
"intermediate_size": 3072,
|
| 315 |
+
"is_decoder": false,
|
| 316 |
+
"is_encoder_decoder": false,
|
| 317 |
+
"label2id": {
|
| 318 |
+
"LABEL_0": 0,
|
| 319 |
+
"LABEL_1": 1
|
| 320 |
+
},
|
| 321 |
+
"layer_norm_eps": 1e-12,
|
| 322 |
+
"length_penalty": 1.0,
|
| 323 |
+
"max_length": 20,
|
| 324 |
+
"max_position_embeddings": 512,
|
| 325 |
+
"min_length": 0,
|
| 326 |
+
"model_type": "bert",
|
| 327 |
+
"no_repeat_ngram_size": 0,
|
| 328 |
+
"num_attention_heads": 12,
|
| 329 |
+
"num_beam_groups": 1,
|
| 330 |
+
"num_beams": 1,
|
| 331 |
+
"num_hidden_layers": 12,
|
| 332 |
+
"num_return_sequences": 1,
|
| 333 |
+
"output_attentions": false,
|
| 334 |
+
"output_hidden_states": false,
|
| 335 |
+
"output_scores": false,
|
| 336 |
+
"pad_token_id": 0,
|
| 337 |
+
"position_embedding_type": "absolute",
|
| 338 |
+
"prefix": null,
|
| 339 |
+
"problem_type": null,
|
| 340 |
+
"pruned_heads": {},
|
| 341 |
+
"remove_invalid_values": false,
|
| 342 |
+
"repetition_penalty": 1.0,
|
| 343 |
+
"return_dict": true,
|
| 344 |
+
"return_dict_in_generate": false,
|
| 345 |
+
"sep_token_id": null,
|
| 346 |
+
"task_specific_params": null,
|
| 347 |
+
"temperature": 1.0,
|
| 348 |
+
"tie_encoder_decoder": false,
|
| 349 |
+
"tie_word_embeddings": true,
|
| 350 |
+
"tokenizer_class": null,
|
| 351 |
+
"top_k": 50,
|
| 352 |
+
"top_p": 1.0,
|
| 353 |
+
"torch_dtype": null,
|
| 354 |
+
"torchscript": false,
|
| 355 |
+
"transformers_version": "4.9.2",
|
| 356 |
+
"type_vocab_size": 2,
|
| 357 |
+
"use_bfloat16": false,
|
| 358 |
+
"use_cache": true,
|
| 359 |
+
"vocab_size": 30522
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
Before loading new vocabulary: 30522
|
| 363 |
+
Before loading new vocabulary: 30522
|
| 364 |
+
After loading new vocabulary: 31061
|
| 365 |
+
Resize model embedding and save new tokenizer ...
|
| 366 |
+
Before loading new vocabulary: 30522
|
| 367 |
+
After loading new vocabulary: 31061
|
| 368 |
+
Resize model embedding and save new tokenizer ...
|
| 369 |
+
After loading new vocabulary: 31061
|
| 370 |
+
Resize model embedding and save new tokenizer ...
|
| 371 |
+
Invoke training ...
|
| 372 |
+
Invoke training ...
|
| 373 |
+
Invoke training ...
|
| 374 |
+
[caller] pretrain
|
| 375 |
+
[caller] pretrain
|
| 376 |
+
[caller] pretrain
|
| 377 |
+
[node#0 rank#0] Training on device cuda:0
|
| 378 |
+
[node#2 rank#4] Training on device cuda:0
|
| 379 |
+
[node#0 rank#0] 2 x Tesla V100-SXM2-16GB: 0%
|
| 380 |
+
[node#2 rank#4] 2 x Tesla V100-SXM2-16GB: 0%
|
| 381 |
+
[node#1 rank#2] Training on device cuda:0
|
| 382 |
+
[node#1 rank#2] 2 x Tesla V100-SXM2-16GB: 0%
|
| 383 |
+
[node#0 rank#1] Training on device cuda:1
|
| 384 |
+
[node#2 rank#5] Training on device cuda:1
|
| 385 |
+
[node#0 rank#1] 2 x Tesla V100-SXM2-16GB: 2%
|
| 386 |
+
[node#2 rank#5] 2 x Tesla V100-SXM2-16GB: 2%
|
| 387 |
+
[node#1 rank#3] Training on device cuda:1
|
| 388 |
+
[node#1 rank#3] 2 x Tesla V100-SXM2-16GB: 2%
|
| 389 |
+
[node#2 rank#4] Initialized process group ...
|
| 390 |
+
[node#0 rank#0] Initialized process group ...
|
| 391 |
+
[node#1 rank#2] Initialized process group ...
|
| 392 |
+
[node#0 rank#1] Initialized process group ...
|
| 393 |
+
[node#2 rank#5] Initialized process group ...
|
| 394 |
+
[node#1 rank#3] Initialized process group ...
|
| 395 |
+
[node#0 rank#0] Enter Torch DDP.
|
| 396 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 0 using best-guess GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 397 |
+
[node#2 rank#4] Enter Torch DDP.
|
| 398 |
+
[node#0 rank#1] Enter Torch DDP.
|
| 399 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 1 using best-guess GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 400 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 4 using best-guess GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 401 |
+
[node#1 rank#3] Enter Torch DDP.
|
| 402 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 3 using best-guess GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 403 |
+
[node#3 rank#7] Enter Torch DDP.
|
| 404 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 7 using best-guess GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 405 |
+
[node#2 rank#5] Enter Torch DDP.
|
| 406 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 5 using best-guess GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 407 |
+
[node#3 rank#6] Enter Torch DDP.
|
| 408 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 6 using best-guess GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 409 |
+
[node#1 rank#2] Enter Torch DDP.
|
| 410 |
+
[W ProcessGroupNCCL.cpp:1569] Rank 2 using best-guess GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect.Specify device_ids in barrier() to force use of a particular device.
|
| 411 |
+
AdamW (
|
| 412 |
+
Parameter Group 0
|
| 413 |
+
betas: (0.9, 0.999)
|
| 414 |
+
correct_bias: True
|
| 415 |
+
eps: 1e-06
|
| 416 |
+
lr: 1e-06
|
| 417 |
+
weight_decay: 0.01
|
| 418 |
+
)
|
| 419 |
+
AdamW (
|
| 420 |
+
Parameter Group 0
|
| 421 |
+
betas: (0.9, 0.999)
|
| 422 |
+
correct_bias: True
|
| 423 |
+
eps: 1e-06
|
| 424 |
+
lr: 1e-06
|
| 425 |
+
weight_decay: 0.01
|
| 426 |
+
)
|
| 427 |
+
AdamW (
|
| 428 |
+
Parameter Group 0
|
| 429 |
+
betas: (0.9, 0.999)
|
| 430 |
+
correct_bias: True
|
| 431 |
+
eps: 1e-06
|
| 432 |
+
lr: 1e-06
|
| 433 |
+
weight_decay: 0.01
|
| 434 |
+
)
|
| 435 |
+
AdamW (
|
| 436 |
+
Parameter Group 0
|
| 437 |
+
betas: (0.9, 0.999)
|
| 438 |
+
correct_bias: True
|
| 439 |
+
eps: 1e-06
|
| 440 |
+
lr: 1e-06
|
| 441 |
+
weight_decay: 0.01
|
| 442 |
+
)
|
| 443 |
+
AdamW (
|
| 444 |
+
Parameter Group 0
|
| 445 |
+
betas: (0.9, 0.999)
|
| 446 |
+
correct_bias: True
|
| 447 |
+
eps: 1e-06
|
| 448 |
+
lr: 1e-06
|
| 449 |
+
weight_decay: 0.01
|
| 450 |
+
)
|
| 451 |
+
[node#3 rank#7] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 452 |
+
[node#3 rank#7] Start training at: (0, 0, -1)
|
| 453 |
+
AdamW (
|
| 454 |
+
Parameter Group 0
|
| 455 |
+
betas: (0.9, 0.999)
|
| 456 |
+
correct_bias: True
|
| 457 |
+
eps: 1e-06
|
| 458 |
+
lr: 1e-06
|
| 459 |
+
weight_decay: 0.01
|
| 460 |
+
)
|
| 461 |
+
Loading test data: data.arjmPWtGwzKrkmR/test.txt (bsize=8)
|
| 462 |
+
[node#3 rank#6] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 463 |
+
[node#3 rank#6] Start training at: (0, 0, -1)
|
| 464 |
+
[node#3 rank#7] Loading shard data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730 ...
|
| 465 |
+
AdamW (
|
| 466 |
+
Parameter Group 0
|
| 467 |
+
betas: (0.9, 0.999)
|
| 468 |
+
correct_bias: True
|
| 469 |
+
eps: 1e-06
|
| 470 |
+
lr: 1e-06
|
| 471 |
+
weight_decay: 0.01
|
| 472 |
+
)
|
| 473 |
+
AdamW (
|
| 474 |
+
Parameter Group 0
|
| 475 |
+
betas: (0.9, 0.999)
|
| 476 |
+
correct_bias: True
|
| 477 |
+
eps: 1e-06
|
| 478 |
+
lr: 1e-06
|
| 479 |
+
weight_decay: 0.01
|
| 480 |
+
)
|
| 481 |
+
[node#1 rank#2] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 482 |
+
[node#1 rank#2] Start training at: (0, 0, -1)
|
| 483 |
+
Loading test data: data.arjmPWtGwzKrkmR/test.txt (bsize=8)
|
| 484 |
+
[node#3 rank#6] Loading shard data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730 ...
|
| 485 |
+
[node#1 rank#3] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 486 |
+
[node#1 rank#3] Start training at: (0, 0, -1)
|
| 487 |
+
[node#2 rank#4] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 488 |
+
[node#2 rank#4] Start training at: (0, 0, -1)
|
| 489 |
+
[node#2 rank#5] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 490 |
+
[node#2 rank#5] Start training at: (0, 0, -1)
|
| 491 |
+
Loading test data: data.arjmPWtGwzKrkmR/test.txt (bsize=8)
|
| 492 |
+
Loading test data: data.arjmPWtGwzKrkmR/test.txt (bsize=8)
|
| 493 |
+
[node#2 rank#4] Loading shard data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730 ...
|
| 494 |
+
[node#2 rank#5] Loading shard data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730 ...
|
| 495 |
+
Loading test data: data.arjmPWtGwzKrkmR/test.txt (bsize=8)
|
| 496 |
+
Loading test data: data.arjmPWtGwzKrkmR/test.txt (bsize=8)
|
| 497 |
+
[node#1 rank#3] Loading shard data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730 ...
|
| 498 |
+
[node#1 rank#2] Loading shard data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730 ...
|
| 499 |
+
[node#0 rank#1] Shards: ['data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.6632730', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7074912', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7517094', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.7959276', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8401458', 'data.arjmPWtGwzKrkmR/mse-aops-2021-data.pkl.pairs.8843640']
|
| 500 |
+
[node#0 rank#1] Start training at: (0, 0, -1)
|
job-25031358-tail.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e609127390ca34a2bdc307d1f66fc68b056d1550973adbee02979df8e091162e
|
| 3 |
+
size 442169891
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|