{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "([bos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "([eos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "([unk])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "([pad])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "([mask])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "``" }, "content": "\"" }, { "type": "Replace", "pattern": { "String": "''" }, "content": "\"" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "([eos])", "type_id": 1 } } ], "special_tokens": { "([bos])": { "id": "([bos])", "ids": [ 0 ], "tokens": [ "([bos])" ] }, "([eos])": { "id": "([eos])", "ids": [ 1 ], "tokens": [ "([eos])" ] } } }, "decoder": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true }, "model": { "type": "Unigram", "unk_id": 2, "vocab": [ [ "([bos])", 0.0 ], [ "([eos])", 0.0 ], [ "([unk])", 0.0 ], [ "([pad])", 0.0 ], [ "([mask])", 0.0 ], [ "▁", -2.6754905386285905 ], [ "s", -2.8768613408851564 ], [ "e", -3.2762470735395457 ], [ "t", -3.346505232415412 ], [ "▁the", -3.4613494666908995 ], [ "d", -3.90460894182274 ], [ "a", -3.988983277321309 ], [ "▁a", -4.1043433268201825 ], [ "m", -4.130835343792769 ], [ "y", -4.187992643366066 ], [ "n", -4.217156675408131 ], [ "i", -4.232281987977052 ], [ "ed", -4.311072875814168 ], [ "ing", -4.317765051829164 ], [ "▁to", -4.331294592787134 ], [ "o", -4.346337287814527 ], [ "c", -4.3537456912204755 ], [ "▁of", -4.408302422120218 ], [ "▁in", -4.439226546242843 ], [ "and", -4.447707206682657 ], [ "p", -4.557523898205941 ], [ "u", -4.599969020077225 ], [ "g", -4.644611077920461 ], [ "r", -4.706481270700311 ], [ "l", -4.713380466456869 ], [ "er", -4.74899222065031 ], [ "al", -4.832948319811666 ], [ "▁is", -4.870481905541696 ], [ "in", -4.893132701006505 ], [ "k", -4.974102278368674 ], [ "or", -5.0025052704507935 ], [ "b", -5.035860350244143 ], [ "▁be", -5.071977878617131 ], [ "ly", -5.110970671924331 ], [ "en", -5.176639562119152 ], [ "it", -5.2223464220674005 ], [ "v", -5.230571121824163 ], [ "le", -5.261011559598946 ], [ "ar", -5.28733580866414 ], [ "ch", -5.331946977557094 ], [ "st", -5.34591745748041 ], [ "an", -5.355807230421707 ], [ "▁f", -5.391744389389056 ], [ "ic", -5.423546538328285 ], [ "▁for", -5.447670532180611 ], [ "f", -5.48303699228936 ], [ "w", -5.5198467796805755 ], [ "ur", -5.530880383304706 ], [ "es", -5.5501989639084055 ], [ "on", -5.557399947373246 ], [ "▁re", -5.567545311958694 ], [ "▁are", -5.573269832805648 ], [ "▁on", -5.59021385720431 ], [ "th", -5.627197447181516 ], [ "▁h", -5.644403198021651 ], [ "▁tha", -5.647913682947817 ], [ "▁c", -5.6522531999839325 ], [ "h", -5.6671890386742785 ], [ "▁w", -5.688082766473154 ], [ "re", -5.722153275438761 ], [ "ir", -5.7903344504118595 ], [ "▁b", -5.801750081259955 ], [ "▁with", -5.819491368215889 ], [ "ro", -5.822405765770177 ], [ "ri", -5.854370618582819 ], [ "ation", -5.855145426489802 ], [ "0", -5.8872829347974 ], [ "▁was", -5.894023413092957 ], [ "ent", -5.913173819895014 ], [ "▁or", -5.929829545288575 ], [ "ce", -5.94118778631942 ], [ "▁an", -5.942275864998134 ], [ "▁st", -5.959845626706876 ], [ "▁p", -5.979182853147907 ], [ "▁co", -5.99464444437195 ], [ "tion", -6.018005629870164 ], [ "▁de", -6.0197214334498375 ], [ "at", -6.023872247567192 ], [ "▁it", -6.0468006319666525 ], [ "il", -6.071419632524207 ], [ "▁su", -6.087265345738892 ], [ "▁ma", -6.0955072307599565 ], [ "▁as", -6.096445849090732 ], [ "▁fro", -6.129354549270313 ], [ "▁you", -6.131897252589564 ], [ "ol", -6.1390576793196425 ], [ "▁con", -6.145755126990158 ], [ "la", -6.146651426274379 ], [ "▁can", -6.149043410740569 ], [ "▁he", -6.152763862900574 ], [ "▁hav", -6.156435802908428 ], [ "▁1", -6.160132051411401 ], [ "is", -6.195569695401377 ], [ "▁by", -6.212529629983307 ], [ "lo", -6.220446873243513 ], [ "ll", -6.228179292983937 ], [ "ies", -6.245665905271688 ], [ "id", -6.260508224854381 ], [ "▁not", -6.267819101117954 ], [ "if", -6.270644329586759 ], [ "un", -6.275506263031922 ], [ "▁thi", -6.2881655764394235 ], [ "▁di", -6.329588431001757 ], [ "us", -6.333327209962254 ], [ "ra", -6.340865501780096 ], [ "▁at", -6.355182350030107 ], [ "j", -6.357964711000992 ], [ "ve", -6.372531519329924 ], [ "om", -6.37496567318558 ], [ "ate", -6.3753246003687005 ], [ "el", -6.390196095937399 ], [ "ion", -6.396975087246941 ], [ "▁2", -6.400755836647712 ], [ "▁we", -6.412240561664114 ], [ "▁g", -6.416027622473206 ], [ "z", -6.43633541535936 ], [ "▁sa", -6.43850552769723 ], [ "ver", -6.447887731618186 ], [ "▁whi", -6.457883910972052 ], [ "x", -6.459583214795332 ], [ "▁pa", -6.503480316267584 ], [ "ive", -6.5110947788952975 ], [ "ight", -6.536365914035072 ], [ "ers", -6.5428103154111135 ], [ "▁bo", -6.546150816046826 ], [ "▁no", -6.547088192255935 ], [ "ment", -6.562479873766899 ], [ "▁pro", -6.571064898982521 ], [ "act", -6.583990559778682 ], [ "▁has", -6.603654202468725 ], [ "out", -6.60580960365786 ], [ "▁mo", -6.612206063842681 ], [ "▁man", -6.641204427471876 ], [ "age", -6.641280763657149 ], [ "▁will", -6.64270247271019 ], [ "▁me", -6.647724830351693 ], [ "4", -6.651679234308922 ], [ "ant", -6.653057213377799 ], [ "▁sp", -6.653170671243362 ], [ "▁us", -6.660440017314415 ], [ "5", -6.669223493954846 ], [ "▁k", -6.669597778381468 ], [ "▁some", -6.6818801214084935 ], [ "were", -6.685358326258509 ], [ "day", -6.685810790026934 ], [ "est", -6.692698060575062 ], [ "ul", -6.710575311022998 ], [ "ph", -6.73056730249125 ], [ "▁ex", -6.733336219158555 ], [ "▁fa", -6.765091874615061 ], [ "▁so", -6.767286242386988 ], [ "ard", -6.772605838493793 ], [ "▁un", -6.787572388877607 ], [ "▁do", -6.795074557643034 ], [ "▁other", -6.7956782657022545 ], [ "▁but", -6.8025729087235485 ], [ "ther", -6.803231620372917 ], [ "ian", -6.812902667758519 ], [ "▁one", -6.821076061857491 ], [ "▁more", -6.8266493698278925 ], [ "6", -6.82879636799627 ], [ "ous", -6.855962306912703 ], [ "▁whe", -6.859091467805891 ], [ "able", -6.8654555236076575 ], [ "▁most", -6.865507428780849 ], [ "▁all", -6.868332909576947 ], [ "ca", -6.868732115162404 ], [ "▁mi", -6.871663607421129 ], [ "▁car", -6.872038781117334 ], [ "▁en", -6.877446929350223 ], [ "per", -6.892167759263063 ], [ "▁time", -6.8931312430041185 ], [ "▁vi", -6.895209208386817 ], [ "had", -6.898533648058522 ], [ "▁go", -6.902336913332548 ], [ "lso", -6.913439467216554 ], [ "▁sh", -6.919449248503678 ], [ "▁li", -6.922560430255311 ], [ "▁countr", -6.923075365505835 ], [ "▁ra", -6.924585801451686 ], [ "▁br", -6.931064678460284 ], [ "▁ab", -6.931940367854744 ], [ "ia", -6.937806621480712 ], [ "ance", -6.957346174263031 ], [ "8", -6.961832959098544 ], [ "▁3", -6.968100343799258 ], [ "▁there", -6.9733914148350085 ], [ "ish", -6.986137594465574 ], [ "▁even", -7.002799148099052 ], [ "▁people", -7.02716205152031 ], [ "▁per", -7.0330618605904665 ], [ "way", -7.03633517131352 ], [ "ern", -7.042479278911798 ], [ "▁pre", -7.0523064460866784 ], [ "ff", -7.053031071467035 ], [ "▁your", -7.066913374732923 ], [ "▁new", -7.0793013547195525 ], [ "9", -7.085883866389088 ], [ "ill", -7.089598704000823 ], [ "▁over", -7.098805832816234 ], [ "▁after", -7.115356643253071 ], [ "▁tra", -7.120228788920384 ], [ "low", -7.122220040050001 ], [ "▁comp", -7.133500815815238 ], [ "▁ba", -7.141131224024946 ], [ "ical", -7.1422260237143265 ], [ "round", -7.1435233350247245 ], [ "7", -7.152886984152966 ], [ "▁year", -7.153144193143549 ], [ "▁may", -7.159125415859334 ], [ "▁na", -7.161008162022526 ], [ "one", -7.184878065317987 ], [ "▁al", -7.195461984655057 ], [ "um", -7.209006967663773 ], [ "▁pri", -7.210069850711059 ], [ "▁world", -7.212023222385909 ], [ "▁who", -7.223014137024571 ], [ "ould", -7.226760603979994 ], [ "▁out", -7.231998550417056 ], [ "▁its", -7.233405561461712 ], [ "▁up", -7.285142155426511 ], [ "▁than", -7.299137896625927 ], [ "▁state", -7.30645541347183 ], [ "ence", -7.318807589581895 ], [ "long", -7.332923092470053 ], [ "▁his", -7.3354486738370595 ], [ "ize", -7.335587746438243 ], [ "ose", -7.339855014717346 ], [ "▁know", -7.341930003063956 ], [ "ry", -7.344186782226564 ], [ "2", -7.344207514829643 ], [ "▁imp", -7.353859464596262 ], [ "ary", -7.354825893119824 ], [ "▁often", -7.358854125859942 ], [ "▁call", -7.378671801439579 ], [ "▁into", -7.386238978021677 ], [ "▁part", -7.392001248006106 ], [ "ture", -7.395700798454083 ], [ "▁how", -7.414132628968153 ], [ "que", -7.4158513488173 ], [ "where", -7.416644301772499 ], [ "▁19", -7.416822207519889 ], [ "1", -7.434230018742243 ], [ "▁travel", -7.451833886454098 ], [ "▁dur", -7.487770219521905 ], [ "▁mon", -7.48847200394581 ], [ "▁work", -7.491585291065162 ], [ "cause", -7.4924841199173855 ], [ "▁war", -7.494757093128933 ], [ "inter", -7.500819628415845 ], [ "▁like", -7.505488775076321 ], [ "thing", -7.510805171237376 ], [ "▁comm", -7.521731365223375 ], [ "▁cit", -7.524839744064275 ], [ "▁sai", -7.525578186853979 ], [ "read", -7.527557697032419 ], [ "▁different", -7.5464568591042385 ], [ "▁get", -7.552169862062854 ], [ "ock", -7.555306657435667 ], [ "▁show", -7.575335097438488 ], [ "▁north", -7.575620423647008 ], [ "▁make", -7.577043786702754 ], [ "▁tri", -7.590393027908454 ], [ "though", -7.593003994539069 ], [ "▁jo", -7.617850059876274 ], [ "ign", -7.621768802470928 ], [ "▁fre", -7.625386647341769 ], [ "▁see", -7.6336479886677315 ], [ "▁app", -7.639523684019808 ], [ "▁place", -7.651093729477778 ], [ "▁water", -7.651415059507122 ], [ "▁report", -7.666922795581368 ], [ "▁just", -7.683062641491382 ], [ "▁should", -7.684631124020186 ], [ "▁cha", -7.686782915070514 ], [ "side", -7.706471569720542 ], [ "ness", -7.708460850991384 ], [ "▁south", -7.716338337987308 ], [ "▁10", -7.7273771012898225 ], [ "▁includ", -7.733039602026582 ], [ "▁back", -7.733220730712199 ], [ "▁qu", -7.755042830237189 ], [ "▁par", -7.761722791424332 ], [ "▁small", -7.767843049176307 ], [ "▁need", -7.769123438158099 ], [ "3", -7.771541622268048 ], [ "tter", -7.772352757891329 ], [ "ough", -7.782401384709099 ], [ "▁take", -7.803964097405506 ], [ "▁require", -7.822395117221289 ], [ "▁base", -7.823680575156837 ], [ "▁gra", -7.825064277345991 ], [ "▁through", -7.841251976339118 ], [ "▁high", -7.841254254031162 ], [ "▁visit", -7.861379901015909 ], [ "▁home", -7.862319183680619 ], [ "cient", -7.8663293686340445 ], [ "▁ski", -7.872481670538821 ], [ "▁island", -7.882319522995838 ], [ "▁large", -7.900124903843213 ], [ "▁down", -7.900275079303846 ], [ "▁child", -7.900295796047779 ], [ "▁near", -7.9006274881876095 ], [ "each", -7.904823960369406 ], [ "▁number", -7.92049657311796 ], [ "ship", -7.923714185081133 ], [ "special", -7.924246082451086 ], [ "▁found", -7.941336564660717 ], [ "▁sea", -7.954838574459147 ], [ "▁again", -7.962674285284644 ], [ "▁learn", -7.962691878072893 ], [ "ember", -7.965884834890247 ], [ "▁vari", -7.96616597834795 ], [ "▁animal", -7.984363992669607 ], [ "▁what", -7.984502416198689 ], [ "▁europe", -8.006567891721467 ], [ "▁200", -8.00660766861835 ], [ "000000", -8.012236019613125 ], [ "mark", -8.01276901828499 ], [ "sure", -8.015138086899906 ], [ "▁adv", -8.023621444584538 ], [ "▁whil", -8.02780271246084 ], [ "▁chi", -8.045482944578183 ], [ "▁buil", -8.052815943522955 ], [ "▁every", -8.056538487431741 ], [ "▁va", -8.06777735608203 ], [ "▁become", -8.076393297026822 ], [ "▁local", -8.076412720519006 ], [ "▁stud", -8.07837962249372 ], [ "▁due", -8.078849866853641 ], [ "▁gree", -8.082887671546896 ], [ "▁centur", -8.100942155064343 ], [ "▁play", -8.101156837441584 ], [ "cross", -8.102046351520748 ], [ "▁government", -8.125750652904102 ], [ "▁system", -8.125751292965452 ], [ "▁point", -8.126694846256854 ], [ "▁trans", -8.128116666544686 ], [ "▁name", -8.13730426679406 ], [ "▁german", -8.151397109598614 ], [ "▁culture", -8.151404168932643 ], [ "▁famil", -8.15169431433106 ], [ "▁histor", -8.151992571385806 ], [ "▁help", -8.152379382465845 ], [ "▁find", -8.15399509736029 ], [ "▁run", -8.15551819933091 ], [ "▁medi", -8.175453324820076 ], [ "▁general", -8.177711046639622 ], [ "▁usual", -8.178143536295165 ], [ "▁lead", -8.17819966076631 ], [ "▁offer", -8.180232661192168 ], [ "came", -8.183643902583706 ], [ "qui", -8.192813528327466 ], [ "imate", -8.204585093979773 ], [ "▁provide", -8.204769066637121 ], [ "▁start", -8.206828802738865 ], [ "▁possibl", -8.23251237309725 ], [ "▁temple", -8.232512443222566 ], [ "▁develop", -8.23251253510426 ], [ "▁before", -8.232542372986714 ], [ "▁sever", -8.233431940537834 ], [ "▁game", -8.234672632014709 ], [ "▁human", -8.261150107977098 ], [ "▁consider", -8.261160080863537 ], [ "▁change", -8.261187464522354 ], [ "▁book", -8.261600372522377 ], [ "▁1000", -8.289718237006165 ], [ "▁tourist", -8.29054215262876 ], [ "▁great", -8.290624738675088 ], [ "▁region", -8.290771793199044 ], [ "▁protest", -8.290809821436957 ], [ "▁limit", -8.290876449378203 ], [ "▁under", -8.291576583902883 ], [ "▁driv", -8.291901980259404 ], [ "▁cru", -8.29526860346621 ], [ "▁relat", -8.315627957281512 ], [ "▁language", -8.320798343228995 ], [ "▁201", -8.320810101005854 ], [ "▁remain", -8.320835267121115 ], [ "▁receive", -8.32087919331787 ], [ "▁team", -8.320905438873691 ], [ "▁close", -8.321788682484456 ], [ "▁follow", -8.352087907661254 ], [ "▁major", -8.352166130913496 ], [ "▁much", -8.352275564219338 ], [ "▁wild", -8.352831893439728 ], [ "ject", -8.352943963542865 ], [ "▁case", -8.354836760378483 ], [ "▁official", -8.384306537193822 ], [ "▁group", -8.384309677325607 ], [ "▁africa", -8.384310670398985 ], [ "▁season", -8.38466131549998 ], [ "▁service", -8.417640029535818 ], [ "▁believe", -8.417644279258813 ], [ "▁look", -8.417696460056247 ], [ "▁camp", -8.4177560776734 ], [ "▁own", -8.417875270619882 ], [ "break", -8.417973529893512 ], [ "▁affect", -8.452123796949778 ], [ "▁sometime", -8.452127443718524 ], [ "▁america", -8.452135735357338 ], [ "▁charge", -8.45219566004275 ], [ "▁understand", -8.452216747124485 ], [ "▁type", -8.452232925203674 ], [ "▁class", -8.452279844492605 ], [ "▁earth", -8.452354317519077 ], [ "▁communi", -8.45314714922437 ], [ "▁locat", -8.458516662039814 ], [ "▁problem", -8.487840831803812 ], [ "▁public", -8.52487386512437 ], [ "▁particular", -8.52487392465013 ], [ "▁success", -8.52487575044468 ], [ "▁popular", -8.563335769640975 ], [ "▁japan", -8.563335928219438 ], [ "▁transport", -8.56333935408694 ], [ "▁snow", -8.563350743467298 ], [ "▁legal", -8.56349422889885 ], [ "company", -8.567823788313378 ], [ "▁australia", -8.603335377698876 ], [ "▁experience", -8.60333543690594 ], [ "▁effect", -8.603335572481557 ], [ "▁expect", -8.603336545491683 ], [ "▁religio", -8.60333719794059 ], [ "▁airline", -8.603337729717676 ], [ "▁food", -8.603423880765625 ], [ "▁process", -8.603434066309912 ], [ "▁franc", -8.607137440189035 ], [ "▁according", -8.645002056827511 ], [ "▁international", -8.645002161206236 ], [ "▁authorit", -8.645002186529833 ], [ "▁econom", -8.645003467085711 ], [ "▁check", -8.645008609181772 ], [ "mission", -8.652738598036581 ], [ "▁olympic", -8.688480321150827 ], [ "▁discover", -8.688481280484586 ], [ "▁direct", -8.68862007392972 ], [ "▁arriv", -8.68883024075118 ], [ "strict", -8.689662128023302 ], [ "%", -8.733934831176956 ], [ "▁window", -8.733935012824286 ], [ "▁support", -8.733935222634948 ], [ "▁document", -8.733936433415515 ], [ "▁school", -8.7339383128885 ], [ "▁univers", -8.733952912966306 ], [ "▁plann", -8.733959359687297 ], [ "▁mountain", -8.781553962524354 ], [ "▁attack", -8.78155483428725 ], [ "▁addition", -8.781556055460403 ], [ "▁education", -8.781556487999397 ], [ "▁crash", -8.781565077101176 ], [ "abilit", -8.782467496776816 ], [ "▁similar", -8.831553958948293 ], [ "▁regular", -8.831555814320183 ], [ "▁involve", -8.831558534910847 ], [ "▁infect", -8.831583753401237 ], [ "▁photo", -8.831590339505494 ], [ "▁establish", -8.884185458496821 ], [ "▁independen", -8.884185473898746 ], [ "▁physical", -8.884185942057826 ], [ "▁current", -8.884189149406275 ], [ "00000", -8.92697290848362 ], [ "▁organization", -8.939741013377946 ], [ "▁behavior", -8.93974101496351 ], [ "▁investigat", -8.9397411668559 ], [ "▁television", -8.998564542859848 ], [ "▁republic", -8.99856454364473 ], [ "▁potential", -8.998564656926455 ], [ "▁example", -8.998564715546378 ], [ "▁original", -8.99856478139713 ], [ "▁population", -8.998564898026078 ], [ "▁happen", -8.998564920034934 ], [ "▁website", -8.998566502024065 ], [ "▁individual", -9.061064542741311 ], [ "▁surviv", -9.061074061068329 ], [ "▁simpl", -9.06108516492456 ], [ "▁neighbor", -9.127731209385104 ], [ "▁observ", -9.12773163177753 ], [ "▁associat", -9.127733727827064 ], [ "▁earthquake", -9.199159780805967 ], [ "$", -9.359416191062518 ], [ ".", -9.359416191062522 ], [ "▁network", -9.359416194093198 ], [ "▁agricultur", -9.359428904038964 ], [ "▁geograph", -9.45032528230153 ], [ "▁legislati", -9.450326304956086 ], [ "]", -10.095960202659114 ], [ "[", -10.095960202659116 ], [ "-", -10.295960202619158 ], [ "¢", -11.37929353599245 ], [ "​", -11.37929353599245 ], [ "£", -12.378993535952503 ], [ "q", -12.379093535952505 ], [ ")", -12.379193535952504 ], [ "(", -12.379293535952502 ], [ "€", -12.379293535952502 ] ] } }