{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "``" }, "content": "\"" }, { "type": "Replace", "pattern": { "String": "''" }, "content": "\"" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 0 ], "tokens": [ "" ] } } }, "decoder": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true }, "model": { "type": "Unigram", "unk_id": 2, "vocab": [ [ "", 0.0 ], [ "", 0.0 ], [ "", 0.0 ], [ "", 0.0 ], [ "", 0.0 ], [ "▁", -2.365659253373355 ], [ "e", -2.7726274257071637 ], [ "s", -2.809259472670931 ], [ "t", -2.9379066777594325 ], [ "a", -3.0234880395661925 ], [ "i", -3.130101696517535 ], [ "r", -3.316708085002908 ], [ "o", -3.489081473090966 ], [ "n", -3.6166233466724016 ], [ "d", -3.662076718281835 ], [ "▁the", -3.83627519078912 ], [ "\n", -3.853460409536659 ], [ "l", -3.88292372065564 ], [ "c", -4.143267028826475 ], [ "m", -4.228727291757144 ], [ "u", -4.335524745161635 ], [ "p", -4.359575101224138 ], [ "▁to", -4.386513688410018 ], [ "ing", -4.483100337757939 ], [ "▁and", -4.520970283112547 ], [ "f", -4.525275602775812 ], [ "▁of", -4.57605271191775 ], [ "y", -4.58382387353196 ], [ "▁in", -4.740397653766353 ], [ "g", -4.791754116540284 ], [ "h", -4.836474590087402 ], [ "b", -4.909411232248008 ], [ "▁that", -5.199912115593687 ], [ "k", -5.263461955826331 ], [ "w", -5.281236984402881 ], [ "▁is", -5.299636845493669 ], [ "▁be", -5.419702910541638 ], [ "in", -5.435910346470905 ], [ "▁for", -5.512356650709723 ], [ "v", -5.604824905017557 ], [ "▁we", -5.610244633591739 ], [ "ly", -5.626062101437663 ], [ "▁you", -5.743884888035879 ], [ "▁on", -5.812146705563078 ], [ "▁he", -5.884272913006649 ], [ "▁are", -5.911223094826363 ], [ "▁as", -5.929833784333798 ], [ "▁was", -5.944656786916925 ], [ "▁with", -5.94662811898028 ], [ "ve", -6.029713712838159 ], [ "▁have", -6.056062667440747 ], [ "▁but", -6.078495912607519 ], [ "▁do", -6.120033501363489 ], [ "▁this", -6.135130124131251 ], [ "ur", -6.159085253292046 ], [ "▁co", -6.1634956258135425 ], [ "▁not", -6.1688698819610295 ], [ "ation", -6.174891736114816 ], [ "▁they", -6.230896897497015 ], [ "us", -6.263880501182529 ], [ "▁one", -6.280948860315332 ], [ "'s", -6.287063313479933 ], [ "▁or", -6.295727284839591 ], [ "▁ma", -6.33284022033607 ], [ "▁me", -6.342682111290012 ], [ "▁can", -6.347928769198857 ], [ "▁an", -6.386687084496504 ], [ "▁con", -6.411591977758151 ], [ "ll", -6.421978033658197 ], [ "▁pa", -6.48576747304849 ], [ "ck", -6.498920060222396 ], [ "▁from", -6.510430181366692 ], [ "▁said", -6.5497990597464995 ], [ "▁all", -6.5597607202216235 ], [ "▁ch", -6.56905588694808 ], [ "▁there", -6.5830177262538445 ], [ "▁what", -6.586112233691299 ], [ "▁ca", -6.617227466784243 ], [ "ide", -6.630277697578046 ], [ "'", -6.636383868446497 ], [ "▁lo", -6.658399655498306 ], [ "▁ex", -6.680873400433887 ], [ "▁go", -6.684875077627906 ], [ "▁ba", -6.69101369979588 ], [ "age", -6.692079993714325 ], [ "▁by", -6.697920339970182 ], [ "▁about", -6.705057222090154 ], [ "▁more", -6.712459504370525 ], [ "▁no", -6.724089235660829 ], [ "▁who", -6.728620204290847 ], [ "▁my", -6.738463579994518 ], [ "▁out", -6.74381208679061 ], [ "men", -6.749756175437199 ], [ "▁will", -6.750296473528449 ], [ "▁people", -6.752779647842177 ], [ "▁their", -6.762497277425371 ], [ "▁pro", -6.778461406812822 ], [ "rea", -6.784288329157706 ], [ "j", -6.7882011736527055 ], [ "one", -6.7947783162153215 ], [ "ive", -6.8137808682097365 ], [ "▁up", -6.820641238110765 ], [ "▁th", -6.828317656094258 ], [ "per", -6.830780397681881 ], [ "▁when", -6.845789034417841 ], [ "▁like", -6.846099446872438 ], [ "▁has", -6.858481313045974 ], [ "▁two", -6.8754035721673645 ], [ "▁her", -6.877864882622244 ], [ "ure", -6.890885409969645 ], [ "x", -6.898140163448586 ], [ "▁some", -6.901803863061467 ], [ "▁his", -6.903905558474841 ], [ "▁time", -6.907975494420091 ], [ "les", -6.940601705628117 ], [ "▁she", -6.9507131763320835 ], [ "▁sh", -6.952521338654096 ], [ "red", -6.958620516243254 ], [ "▁see", -6.9822760008455145 ], [ "▁would", -6.983175910632587 ], [ "▁get", -6.987675091372429 ], [ "▁ha", -6.988701633352527 ], [ "▁our", -6.994954275263092 ], [ "▁pre", -7.001261302546556 ], [ "▁had", -7.008694902540036 ], [ "▁were", -7.014572559915726 ], [ "▁just", -7.015066325981129 ], [ "▁thousand", -7.027882847809211 ], [ "end", -7.032569076938103 ], [ "▁cl", -7.033247703700155 ], [ "z", -7.036404857337109 ], [ "able", -7.045084509819359 ], [ "ight", -7.049731749195805 ], [ "▁it's", -7.0518053707132005 ], [ "▁how", -7.058726391933215 ], [ "▁hundred", -7.059674179353376 ], [ "▁comp", -7.083542356209991 ], [ "▁dis", -7.093914212996813 ], [ "▁your", -7.117440901089488 ], [ "▁than", -7.128868371432702 ], [ "▁which", -7.129356471133214 ], [ "▁work", -7.129359951395434 ], [ "▁other", -7.135976300026186 ], [ "▁say", -7.177611083373817 ], [ "▁vi", -7.181149894078867 ], [ "ver", -7.191963860463096 ], [ "▁cr", -7.193242581900805 ], [ "▁know", -7.196656171543751 ], [ "▁new", -7.200727503764986 ], [ "ther", -7.204924714404292 ], [ "▁been", -7.205833241748348 ], [ "ach", -7.208229328292404 ], [ "ance", -7.208401352102113 ], [ "com", -7.2587815267099085 ], [ "ical", -7.278190190681167 ], [ "▁sta", -7.2962801930271795 ], [ "▁make", -7.297080155631608 ], [ "man", -7.297357070474016 ], [ "▁pu", -7.301110944798582 ], [ "▁car", -7.307416744513651 ], [ "▁think", -7.315076410486418 ], [ "gra", -7.327973901718577 ], [ "▁even", -7.329667453772984 ], [ "▁now", -7.334324859615647 ], [ "▁want", -7.338644082597581 ], [ "▁bu", -7.340994934917251 ], [ "▁over", -7.357011041337032 ], [ "▁way", -7.358907442350617 ], [ "▁into", -7.361535232495497 ], [ "ction", -7.370624313735977 ], [ "▁res", -7.370897459814932 ], [ "tter", -7.372367561211387 ], [ "▁la", -7.373978735498747 ], [ "ful", -7.374168968662778 ], [ "▁because", -7.374226327340223 ], [ "▁nine", -7.377940224587505 ], [ "ell", -7.38168358084371 ], [ "he", -7.385884132318921 ], [ "▁li", -7.38608373851878 ], [ "▁could", -7.38798793644245 ], [ "ence", -7.40182339117924 ], [ "▁very", -7.408068976933197 ], [ "▁ar", -7.416235924919274 ], [ "▁us", -7.421022762491443 ], [ "▁them", -7.439007960826135 ], [ "ze", -7.442237617833348 ], [ "ally", -7.445673208127968 ], [ "und", -7.449077104869996 ], [ "▁look", -7.4659201053893955 ], [ "ving", -7.476701160870162 ], [ "▁use", -7.489550017591284 ], [ "▁need", -7.507395353897277 ], [ "▁most", -7.508463507233221 ], [ "ang", -7.512544632371611 ], [ "▁every", -7.517595810044174 ], [ "qui", -7.522443160037289 ], [ "▁any", -7.523800340825595 ], [ "▁bi", -7.526209973364409 ], [ "▁cu", -7.532985493152722 ], [ "ill", -7.542831181363763 ], [ "▁only", -7.542967005319637 ], [ "▁its", -7.548158954564299 ], [ "▁take", -7.549454035649402 ], [ "▁day", -7.55289323110352 ], [ "▁part", -7.5550550241872205 ], [ "▁back", -7.556383210659442 ], [ "▁three", -7.557901106933523 ], [ "▁going", -7.561348004910169 ], [ "ever", -7.562036821423799 ], [ "▁years", -7.562332353467333 ], [ "▁also", -7.563420504567635 ], [ "▁these", -7.563445901010155 ], [ "▁world", -7.565852728256363 ], [ "▁jo", -7.566271341802093 ], [ "for", -7.573909200353215 ], [ "ated", -7.574595698553555 ], [ "▁where", -7.582311490247022 ], [ "▁app", -7.583407657098217 ], [ "ble", -7.627885239687924 ], [ "▁five", -7.631767928016329 ], [ "▁many", -7.635697451452415 ], [ "▁rec", -7.637686462277019 ], [ "▁first", -7.644434380130841 ], [ "▁much", -7.644436171280629 ], [ "▁good", -7.657353544125229 ], [ "▁don't", -7.6768565491070415 ], [ "▁ga", -7.69217957641502 ], [ "▁six", -7.692693565978264 ], [ "q", -7.696493807735186 ], [ "rac", -7.700769840354173 ], [ "▁him", -7.708060244213307 ], [ "▁may", -7.710511654127954 ], [ "▁pri", -7.722091815046571 ], [ "▁come", -7.723292409553087 ], [ "▁those", -7.724939069549475 ], [ "▁play", -7.728249278606009 ], [ "ster", -7.729194989675051 ], [ "▁life", -7.73961540553249 ], [ "led", -7.741277900326601 ], [ "▁mu", -7.743922966825693 ], [ "ries", -7.744512973191547 ], [ "▁four", -7.755243064488337 ], [ "mer", -7.759376851450934 ], [ "lic", -7.759810124548851 ], [ "▁after", -7.771154353039114 ], [ "ress", -7.772231241897565 ], [ "▁eight", -7.77506526793125 ], [ "▁really", -7.782333664230924 ], [ "▁year", -7.788820100546483 ], [ "rate", -7.790966895572767 ], [ "▁well", -7.795038016798314 ], [ "▁rel", -7.8007633012099244 ], [ "ugh", -7.805080671564255 ], [ "▁long", -7.814635923692183 ], [ "▁through", -7.829704400466481 ], [ "▁seven", -7.836407871710433 ], [ "▁down", -7.836418492278401 ], [ "▁right", -7.858358391858481 ], [ "▁gu", -7.860403374216325 ], [ "▁should", -7.878607017374005 ], [ "▁show", -7.891284436630819 ], [ "cent", -7.898714301504924 ], [ "▁imp", -7.900788492216902 ], [ "low", -7.905991712097471 ], [ "port", -7.914066728567365 ], [ "line", -7.920416602299406 ], [ "▁twenty", -7.933102379942527 ], [ "▁inter", -7.933146540231563 ], [ "▁point", -7.947713293554532 ], [ "▁though", -7.950963442024106 ], [ "▁help", -7.953395760731333 ], [ "unk", -7.96405383178991 ], [ "land", -7.969005005504927 ], [ "late", -7.974607980796549 ], [ "▁high", -7.9792944119440445 ], [ "hol", -7.984142148598163 ], [ "▁something", -7.987930016312797 ], [ "▁start", -7.991521449712682 ], [ "▁great", -7.995695921767439 ], [ "▁did", -7.995769657368477 ], [ "▁own", -7.998997362615391 ], [ "▁still", -8.004415671326598 ], [ "▁give", -8.007682726112003 ], [ "▁change", -8.043547517434014 ], [ "▁live", -8.045269835331228 ], [ "▁mean", -8.051337748483254 ], [ "▁ten", -8.05686057363299 ], [ "ions", -8.056903676226455 ], [ "▁feel", -8.058066665774504 ], [ "dent", -8.066761170886643 ], [ "▁plan", -8.071036532069428 ], [ "▁around", -8.074900212064014 ], [ "▁again", -8.088898555817396 ], [ "ked", -8.090024710382108 ], [ "▁i'm", -8.092715477262878 ], [ "▁win", -8.100661165025917 ], [ "▁before", -8.104489788124134 ], [ "▁place", -8.10461968915699 ], [ "▁find", -8.11430104751808 ], [ "▁rep", -8.122315146334142 ], [ "▁old", -8.123425447706959 ], [ "que", -8.124806078291183 ], [ "▁home", -8.138270977097749 ], [ "▁same", -8.146767026460113 ], [ "▁made", -8.146986846010169 ], [ "ities", -8.150760621978181 ], [ "▁gene", -8.15327179232153 ], [ "▁little", -8.157188696200285 ], [ "▁never", -8.158766011413453 ], [ "▁add", -8.160851525577536 ], [ "▁dec", -8.162053869836686 ], [ "▁such", -8.166487774779092 ], [ "▁real", -8.170648079845975 ], [ "<", -8.174546931075481 ], [ ">", -8.174546931075481 ], [ "▁different", -8.177401003173868 ], [ "▁america", -8.195006004980678 ], [ "▁percent", -8.203669636244905 ], [ "▁happen", -8.217603938472323 ], [ "▁person", -8.220095360187253 ], [ "▁try", -8.221193651201798 ], [ "▁problem", -8.22740859722421 ], [ "▁war", -8.23095490125615 ], [ "▁hand", -8.254610862372147 ], [ "▁few", -8.255428618052214 ], [ "▁under", -8.259510957523517 ], [ "▁might", -8.259548828010722 ], [ "▁why", -8.266548799381445 ], [ "▁far", -8.27300403385747 ], [ "▁another", -8.275188116440127 ], [ "▁while", -8.277332697575046 ], [ "▁children", -8.27801874372605 ], [ "▁turn", -8.295468236043432 ], [ "▁hard", -8.319579651267665 ], [ "▁school", -8.324874007903876 ], [ "▁system", -8.33452886342535 ], [ "▁fact", -8.340618576727204 ], [ "ship", -8.356785263078963 ], [ "▁fun", -8.357450339001213 ], [ "▁found", -8.357664922206961 ], [ "▁talk", -8.360321901371401 ], [ "▁always", -8.362459653054763 ], [ "▁water", -8.36618184404266 ], [ "▁kind", -8.370441708712255 ], [ "▁power", -8.4073529834664 ], [ "serv", -8.417257194651876 ], [ "▁human", -8.422197729087952 ], [ "▁thirty", -8.424889240542297 ], [ "▁move", -8.425313917158075 ], [ "▁develop", -8.432217995201652 ], [ "▁country", -8.437154694153358 ], [ "bility", -8.442062543843859 ], [ "▁trans", -8.445491134571299 ], [ "▁keep", -8.44712153859064 ], [ "▁between", -8.45007471210999 ], [ "▁question", -8.451327047455063 ], [ "▁blo", -8.457137199160451 ], [ "▁small", -8.46448825322034 ], [ "▁both", -8.465391170838782 ], [ "▁money", -8.47148024861842 ], [ "▁important", -8.474535449814981 ], [ "▁women", -8.488463151090523 ], [ "▁next", -8.499226729264008 ], [ "▁fifty", -8.508940876979528 ], [ "ality", -8.518162563343079 ], [ "▁we're", -8.523634714033477 ], [ "▁friend", -8.52935941783535 ], [ "▁family", -8.53529333982452 ], [ "▁without", -8.537235506300185 ], [ "▁away", -8.538471008287006 ], [ "▁build", -8.538871941416144 ], [ "▁lead", -8.541724089953997 ], [ "▁today", -8.556512784276267 ], [ "▁number", -8.558202484196894 ], [ "▁large", -8.564258756492885 ], [ "▁health", -8.565300531106969 ], [ "▁learn", -8.567104799745975 ], [ "▁believe", -8.577380612888351 ], [ "▁face", -8.578121546300304 ], [ "ption", -8.585144346347146 ], [ "▁free", -8.59221300125728 ], [ "▁book", -8.599140662214904 ], [ "▁house", -8.602072174491203 ], [ "▁business", -8.603458120072418 ], [ "▁open", -8.624533589738135 ], [ "▁you're", -8.648211923200758 ], [ "▁didn't", -8.65073286945624 ], [ "▁research", -8.654318581492857 ], [ "▁government", -8.659900246962525 ], [ "▁enough", -8.661264202203286 ], [ "▁market", -8.667470844760476 ], [ "▁experience", -8.66898214592779 ], [ "▁course", -8.669777377978134 ], [ "▁second", -8.700729924214157 ], [ "▁create", -8.701429428455523 ], [ "▁together", -8.705533541005922 ], [ "▁product", -8.707952333543432 ], [ "▁month", -8.71266710271921 ], [ "▁understand", -8.71462695216538 ], [ "▁group", -8.719622335030817 ], [ "▁hope", -8.727612049816125 ], [ "▁word", -8.738163070498098 ], [ "▁actually", -8.739409587339788 ], [ "▁million", -8.741440102156783 ], [ "▁public", -8.74296676434594 ], [ "▁food", -8.752893623113767 ], [ "▁effect", -8.757232196017492 ], [ "▁design", -8.761882269915363 ], [ "▁level", -8.804900237478845 ], [ "▁reason", -8.815829965488467 ], [ "▁result", -8.816553476957235 ], [ "▁everything", -8.818899965733241 ], [ "▁direct", -8.83686357974808 ], [ "▁they're", -8.839261528715387 ], [ "▁story", -8.848157809410479 ], [ "▁watch", -8.85631769352631 ], [ "▁process", -8.864285937562881 ], [ "▁certain", -8.864810258454874 ], [ "▁moment", -8.874608010450412 ], [ "▁student", -8.891495076518082 ], [ "▁future", -8.90392038847965 ], [ "▁space", -8.907814016098662 ], [ "▁whether", -8.913050460769432 ], [ "▁anything", -8.915366793380107 ], [ "▁control", -8.919573217710807 ], [ "▁almost", -8.946550058174424 ], [ "▁support", -8.951967867236128 ], [ "▁walk", -8.955584246502461 ], [ "▁doesn't", -8.9638733650071 ], [ "▁information", -8.96888912861277 ], [ "▁social", -8.971342303562142 ], [ "▁follow", -8.97446815714169 ], [ "▁until", -8.990321616601864 ], [ "▁example", -9.001875521562729 ], [ "▁difficult", -9.0165306697857 ], [ "▁already", -9.0177231039658 ], [ "▁expect", -9.021784726096683 ], [ "▁energy", -9.02456104759289 ], [ "▁success", -9.028600208851309 ], [ "▁minute", -9.030795425316878 ], [ "▁europe", -9.04771952287184 ], [ "▁probably", -9.048211213265375 ], [ "▁project", -9.050811914901365 ], [ "▁sometimes", -9.053271521338395 ], [ "▁photo", -9.059860032471327 ], [ "▁patient", -9.075396006321796 ], [ "▁across", -9.081675876568864 ], [ "▁particular", -9.088228568291065 ], [ "▁possible", -9.095938491890518 ], [ "vision", -9.105540231265383 ], [ "▁technology", -9.15104370441145 ], [ "▁environment", -9.159697884475364 ], [ "▁political", -9.167264556603438 ], [ "▁themselves", -9.176977040696691 ], [ "position", -9.204917593191963 ], [ "▁strong", -9.20573374226319 ], [ "▁remember", -9.206030914810102 ], [ "▁character", -9.209911780520674 ], [ "▁picture", -9.223497104806158 ], [ "▁wonder", -9.23122476787115 ], [ "▁community", -9.24137557937244 ], [ "▁perhaps", -9.25359105858772 ], [ "▁economic", -9.254737082281686 ], [ "▁global", -9.257818271485329 ], [ "▁challenge", -9.25895160707356 ], [ "▁individual", -9.297649240927928 ], [ "▁suggest", -9.299664904893852 ], [ "▁natural", -9.306034554769443 ], [ "▁special", -9.34467213541556 ], [ "▁difference", -9.372803643965128 ], [ "▁especially", -9.410608286507568 ], [ "▁tradition", -9.461990845165564 ], [ "▁although", -9.471896386211812 ], [ "▁economy", -9.487149405320348 ], [ "▁potential", -9.555847106305505 ], [ "▁opportunity", -9.567421441451732 ], [ "▁university", -9.678153863522155 ], [ "▁significant", -9.941828751919743 ], [ "0", -13.077322941590936 ], [ "1", -13.452847421598854 ], [ "2", -13.65609192746778 ], [ "9", -14.171178770504884 ], [ "[", -14.347114963703778 ], [ "]", -14.378726769576597 ], [ "3", -14.454520550807782 ], [ "5", -14.675697303185672 ], [ "8", -14.70763610326042 ], [ "4", -15.03602685107692 ], [ "$", -15.03602685107692 ], [ "6", -15.187832394806913 ], [ "7", -15.187832394806913 ], [ "&", -15.635142021469544 ], [ "+", -17.411485454146515 ], [ "=", -17.612080692241783 ], [ "#", -17.8632461934077 ], [ "%", -18.34221444742827 ], [ "@", -18.958881114094932 ], [ "^", -19.792214447428268 ], [ "*", -20.792214447428268 ], [ "\\", -20.792214447428268 ] ] } }