{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "!": 1, "\"": 2, "#": 3, "$": 4, "%": 5, "&": 6, "'": 7, "(": 8, ")": 9, "*": 10, "+": 11, ",": 12, "-": 13, ".": 14, "/": 15, "0": 16, "1": 17, "2": 18, "3": 19, "4": 20, "5": 21, "6": 22, "7": 23, "8": 24, "9": 25, ":": 26, ";": 27, "<": 28, "=": 29, ">": 30, "?": 31, "@": 32, "A": 33, "B": 34, "C": 35, "D": 36, "E": 37, "F": 38, "G": 39, "H": 40, "I": 41, "J": 42, "K": 43, "L": 44, "M": 45, "N": 46, "O": 47, "P": 48, "Q": 49, "R": 50, "S": 51, "T": 52, "U": 53, "V": 54, "W": 55, "X": 56, "Y": 57, "Z": 58, "[": 59, "\\": 60, "]": 61, "^": 62, "_": 63, "`": 64, "a": 65, "b": 66, "c": 67, "d": 68, "e": 69, "f": 70, "g": 71, "h": 72, "i": 73, "j": 74, "k": 75, "l": 76, "m": 77, "n": 78, "o": 79, "p": 80, "q": 81, "r": 82, "s": 83, "t": 84, "u": 85, "v": 86, "w": 87, "x": 88, "y": 89, "z": 90, "{": 91, "|": 92, "}": 93, "~": 94, "Ċ": 95, "č": 96, "Ġ": 97, "\\\"": 98, "Ġ\\\"": 99, "\\\":": 100, "er": 101, "en": 102, "ti": 103, "on": 104, "\\\",": 105, "si": 106, "at": 107, "in": 108, "ro": 109, "tion": 110, "id": 111, "Ġ1": 112, "Ġ0": 113, "or": 114, "la": 115, "po": 116, "al": 117, "el": 118, "tr": 119, "ut": 120, "nu": 121, "od": 122, "se": 123, "pe": 124, "ch": 125, "ze": 126, "ty": 127, "ar": 128, "ad": 129, "size": 130, "dro": 131, "ed": 132, "ec": 133, "Ġ2": 134, "to": 135, "type": 136, "num": 137, "lay": 138, "att": 139, "layer": 140, "an": 141, "ca": 142, "den": 143, "pout": 144, "dropout": 145, "hid": 146, "hidden": 147, "ken": 148, "ention": 149, "attention": 150, "ing": 151, "token": 152, "ue": 153, "ma": 154, "Ġ{": 155, "Ġ3": 156, "re": 157, "em": 158, "Ġ5": 159, "true": 160, "Ġtrue": 161, "pro": 162, "orm": 163, "ac": 164, "12": 165, "emb": 166, "02": 167, "Ġf": 168, "alse": 169, "Ġfalse": 170, "prob": 171, "ex": 172, "oder": 173, "embed": 174, "ding": 175, "Ġ[": 176, "],": 177, "embedding": 178, "am": 179, "Ġ12": 180, "odel": 181, "00": 182, "fi": 183, "he": 184, "di": 185, "os": 186, "it": 187, "layers": 188, "model": 189, "vo": 190, "cab": 191, "vocab": 192, "head": 193, "Ġ{\\\"": 194, "ure": 195, "Ġ512": 196, "st": 197, "posi": 198, "ect": 199, "position": 200, "max": 201, "norm": 202, "sk": 203, "ll": 204, "EL": 205, "ert": 206, "lo": 207, "LA": 208, "},": 209, "BEL": 210, "LABEL": 211, "act": 212, "heads": 213, "ans": 214, "ext": 215, "las": 216, "iz": 217, "text": 218, "trans": 219, "ate": 220, "th": 221, "ver": 222, "ation": 223, "ge": 224, "lassi": 225, "sion": 226, "embeddings": 227, "ep": 228, "{\\\"": 229, "\":": 230, "\"}": 231, "\"{\\\"": 232, "}\"}": 233, "čĊ": 234, "Ġ\"{\\\"": 235, "Ġ{\"": 236, "dec": 237, "Ġ[\\\"": 238, "\\\"],": 239, "enc": 240, "decoder": 241, "lab": 242, "ter": 243, "ini": 244, "izer": 245, "label": 246, "elu": 247, "use": 248, "tiv": 249, "arch": 250, "form": 251, "ures": 252, "len": 253, "itect": 254, "architect": 255, "architectures": 256, "su": 257, "Ġ4": 258, "pat": 259, "transform": 260, "gth": 261, "ers": 262, "tial": 263, "lassifi": 264, "length": 265, "gelu": 266, "ame": 267, "version": 268, "mask": 269, "transformers": 270, "Ġ7": 271, "initial": 272, "bert": 273, "initializer": 274, "path": 275, "pad": 276, "null": 277, "Ġnull": 278, "con": 279, "fe": 280, "name": 281, "sum": 282, "Ġ6": 283, "ng": 284, "For": 285, "ran": 286, "Ġ\"{\\\"_": 287, "dim": 288, "range": 289, "le": 290, "68": 291, "flo": 292, "05": 293, "float": 294, "tor": 295, "32": 296, "dtype": 297, "torch": 298, "mar": 299, "Ġ768": 300, "ent": 301, "eos": 302, "encoder": 303, "feat": 304, "eps": 305, "che": 306, "edi": 307, "cache": 308, "inter": 309, "is": 310, "summar": 311, "fu": 312, "medi": 313, "ol": 314, "activ": 315, "activation": 316, "mediate": 317, "intermediate": 318, "classifi": 319, "me": 320, "07": 321, "bos": 322, "conv": 323, "drop": 324, "ev": 325, "proj": 326, "ear": 327, "072": 328, "summary": 329, "Ġ102": 330, "ab": 331, "be": 332, "Ġ1024": 333, "Ġ3072": 334, "Ġ16": 335, "Ġ32": 336, "cation": 337, "56": 338, "classifier": 339, "eck": 340, "nel": 341, "probs": 342, "ams": 343, "check": 344, "out": 345, "ting": 346, "put": 347, "\\\"},": 348, "sc": 349, "poin": 350, "checkpoin": 351, "as": 352, "Ġ128": 353, "ne": 354, "ff": 355, "ase": 356, "gr": 357, "full": 358, "ta": 359, "Ġ502": 360, "qu": 361, "output": 362, "ient": 363, "time": 364, "ig": 365, "art": 366, "up": 367, "ap": 368, "grad": 369, "gradient": 370, "Ġ8": 371, "un": 372, "checkpointing": 373, "per": 374, "sol": 375, "tive": 376, "lin": 377, "96": 378, "extr": 379, "min": 380, "scal": 381, "bi": 382, "16": 383, "berta": 384, "ForC": 385, "ute": 386, "absol": 387, "absolute": 388, "ck": 389, "LM": 390, "linear": 391, "ts": 392, "for": 393, "av": 394, "ction": 395, "igh": 396, "base": 397, "ector": 398, "do": 399, "ly": 400, "Ġ10": 401, "48": 402, "Ġ40": 403, "extract": 404, "Bert": 405, "ct": 406, "beams": 407, "we": 408, "gro": 409, "Classifi": 410, "scale": 411, "Classification": 412, "pt": 413, "layerdrop": 414, "group": 415, "sil": 416, "22": 417, "ence": 418, "weigh": 419, "pos": 420, "pre": 421, "attn": 422, "Ġ20": 423, "spe": 424, "cod": 425, "au": 426, "10": 427, "fin": 428, "ener": 429, "Ġ4096": 430, "pool": 431, "par": 432, "bias": 433, "chan": 434, "task": 435, "channel": 436, "evector": 437, "codevector": 438, "rate": 439, "BertFor": 440, "Ġ256": 441, "start": 442, "spec": 443, "top": 444, "pooler": 445, "ord": 446, "ali": 447, "pdrop": 448, "feature": 449, "quence": 450, "eneration": 451, "Model": 452, "0000": 453, "Ġ15": 454, "dev": 455, "embd": 456, "adap": 457, "adapter": 458, "add": 459, "ffn": 460, "Ġ24": 461, "Se": 462, "14": 463, "str": 464, "ker": 465, "Sequence": 466, "SequenceClassification": 467, "dition": 468, "Ġ64": 469, "pa": 470, "epsil": 471, "epsilon": 472, "Ġ2048": 473, "de": 474, "fun": 475, "til": 476, "tra": 477, "kernel": 478, "tie": 479, "oss": 480, "rope": 481, "function": 482, "ce": 483, "ping": 484, "fix": 485, "loss": 486, "roberta": 487, "no": 488, "its": 489, "stop": 490, "word": 491, "lation": 492, "early": 493, "stopping": 494, "}},": 495, "pen": 496, "000": 497, "positions": 498, "tic": 499, "ple": 500, "ditional": 501, "past": 502, "nn": 503, "fic": 504, "Ġ305": 505, "\\\\": 506, "alize": 507, "normalize": 508, "ment": 509, "prefix": 510, "lative": 511, "relative": 512, "Generation": 513, "lem": 514, "ide": 515, "groups": 516, "onditional": 517, "onditionalGeneration": 518, "ForConditionalGeneration": 519, "li": 520, "fact": 521, "speci": 522, "final": 523, "problem": 524, "06": 525, "alty": 526, "log": 527, "penalty": 528, "Ġ30522": 529, "factor": 530, "labels": 531, "classification": 532, "ish": 533, "sl": 534, "ctc": 535, "val": 536, "fir": 537, "ace": 538, "first": 539, "iding": 540, "sliding": 541, "new": 542, "Ro": 543, "sing": 544, "sam": 545, "params": 546, "et": 547, "zer": 548, "weight": 549, "bu": 550, "single": 551, "specific": 552, "sed": 553, "logits": 554, "Ġ50256": 555, "19": 556, "ls": 557, "cased": 558, "sta": 559, "td": 560, "ection": 561, "init": 562, "stride": 563, "dir": 564, "tdnn": 565, "ke": 566, "dis": 567, "so": 568, "gpt": 569, "57": 570, "wav": 571, "classi": 572, "ization": 573, "train": 574, "able": 575, "classif": 576, "sh": 577, "sample": 578, "To": 579, "512": 580, "vec": 581, "11": 582, "weights": 583, "ri": 584, "war": 585, "ER": 586, "BertForSequenceClassification": 587, "dat": 588, "static": 589, "dow": 590, "ind": 591, "Token": 592, "fig": 593, "sal": 594, "vi": 595, "Ġ514": 596, "summarization": 597, "oo": 598, "kv": 599, "distil": 600, "std": 601, "edu": 602, "index": 603, "Ġ50": 604, "Roberta": 605, "tem": 606, "masks": 607, "late": 608, "set": 609, "65": 610, "ga": 611, "buck": 612, "ced": 613, "key": 614, "atch": 615, "ets": 616, "ic": 617, "ian": 618, "15": 619, "forced": 620, "buckets": 621, "PT": 622, "ner": 623, "lish": 624, "value": 625, "18": 626, "ode": 627, "win": 628, "Ma": 629, "25": 630, "lar": 631, "Ġto": 632, "fc": 633, "distilbert": 634, "inf": 635, "ĠE": 636, "translation": 637, "poch": 638, "Ġ300": 639, "ward": 640, "36": 641, "ated": 642, "epoch": 643, "Ġ50257": 644, "ve": 645, "clas": 646, "OR": 647, "translate": 648, "ckpt": 649, "ss": 650, "finet": 651, "17": 652, "GPT": 653, "ature": 654, "sp": 655, "ĠEng": 656, "Mask": 657, "ausal": 658, "ausalLM": 659, "ForCausalLM": 660, "lock": 661, "ĠEnglish": 662, "Ġ\\\"},": 663, "resi": 664, "Ġ\\\"/": 665, "Masked": 666, "quan": 667, "finetun": 668, "sel": 669, "ram": 670, "mage": 671, "MaskedLM": 672, "repe": 673, "resid": 674, "co": 675, "24": 676, "window": 677, "large": 678, "inner": 679, "21": 680, "sw": 681, "lm": 682, "ur": 683, "perature": 684, "temperature": 685, "app": 686, "Ġ100": 687, "13": 688, "gment": 689, "config": 690, "Ġ10000": 691, "feed": 692, "RobertaFor": 693, "forward": 694, "all": 695, "uncased": 696, "the": 697, "relu": 698, "ids": 699, "AR": 700, "20": 701, "man": 702, "ON": 703, "Ġ25": 704, "apply": 705, "cls": 706, "fore": 707, "before": 708, "augment": 709, "Ġ14": 710, "Ġ50265": 711, "Ġ9": 712, "ngram": 713, "gative": 714, "quanti": 715, "repeat": 716, "Di": 717, "uto": 718, "class": 719, "ms": 720, "zero": 721, "bfloat": 722, "pu": 723, "He": 724, "tar": 725, "redu": 726, "negative": 727, "arn": 728, "reduction": 729, "ther": 730, "net": 731, "Ġ28": 732, "wen": 733, "Wav": 734, "Vec": 735, "Ġ11": 736, "contr": 737, "silu": 738, "inity": 739, "ai": 740, "other": 741, "infinity": 742, "mt": 743, "rms": 744, "batch": 745, "stable": 746, "image": 747, "selection": 748, "and": 749, "Head": 750, "patch": 751, "TC": 752, "Ġ1500": 753, "space": 754, "ft": 755, "finetuning": 756, "setting": 757, "learn": 758, "128": 759, "settings": 760, "tokenizer": 761, "theta": 762, "DP": 763, "face": 764, "fr": 765, "ook": 766, "mean": 767, "qa": 768, "LMHead": 769, "mu": 770, "ctx": 771, "DE": 772, "book": 773, "LMHeadModel": 774, "bart": 775, "sity": 776, "Ġ320": 777, "exp": 778, "OC": 779, "seq": 780, "learning": 781, "ra": 782, "weighted": 783, "gated": 784, "0002": 785, "Ġ\\\"_": 786, "lama": 787, "lt": 788, "TI": 789, "vision": 790, "channels": 791, "ForCTC": 792, "ub": 793, "84": 794, "PR": 795, "versity": 796, "block": 797, "LOC": 798, "Bad": 799, "diversity": 800, "BadDi": 801, "PER": 802, "facebook": 803, "epochs": 804, "ORG": 805, "steps": 806, "mp": 807, "Ġ1516": 808, "negatives": 809, "quantizer": 810, "codevectors": 811, "astive": 812, "contrastive": 813, "cal": 814, "generation": 815, "DDP": 816, "DDPM": 817, "TokenClassification": 818, "data": 819, "Dis": 820, "pr": 821, "datase": 822, "An": 823, "Tokenizer": 824, "idal": 825, "soidal": 826, "sinu": 827, "shar": 828, "sinusoidal": 829, "04": 830, "Ġ-": 831, "embds": 832, "_\\\":": 833, "mo": 834, "tral": 835, "Distil": 836, "es": 837, "CI": 838, "Ġ13": 839, "get": 840, "ues": 841, "checkpoint": 842, "Auto": 843, "Ġ\\\".": 844, "ED": 845, "ality": 846, "pretrain": 847, "26": 848, "BertForMaskedLM": 849, "uestion": 850, "by": 851, "SC": 852, "ance": 853, "FAR": 854, "ates": 855, "28": 856, "CIFAR": 857, "35": 858, "local": 859, "seed": 860, "models": 861, "over": 862, "fusion": 863, "mode": 864, "Question": 865, "dilation": 866, "ering": 867, "100": 868, "ison": 869, "UN": 870, "ectra": 871, "glo": 872, "sch": 873, "idi": 874, "Answ": 875, "Ġ248": 876, "Answering": 877, "QuestionAnswering": 878, "scaling": 879, "bal": 880, "sav": 881, "env": 882, "95": 883, "mb": 884, "ho": 885, "xlm": 886, "\\\"}\"}": 887, "global": 888, "mem": 889, "lp": 890, "sm": 891, "soft": 892, "ize": 893, "MT": 894, "23": 895, "Ġ30": 896, "bidi": 897, "Ġ32128": 898, "cu": 899, "Ġ200": 900, "vector": 901, "tokens": 902, "Po": 903, "save": 904, "input": 905, "Ġ[[": 906, "xvector": 907, "]],": 908, "order": 909, "dataset": 910, "qwen": 911, "hy": 912, "gle": 913, "cast": 914, "Ġ32000": 915, "wor": 916, "upcast": 917, "inver": 918, "direction": 919, "idx": 920, "res": 921, "directionality": 922, "/\\\",": 923, "inverse": 924, "reorder": 925, "Bart": 926, "target": 927, "AD": 928, "small": 929, "mul": 930, "Con": 931, "stance": 932, "his": 933, "AT": 934, "256": 935, "ameter": 936, "parameter": 937, "Ġ26": 938, "NL": 939, "xls": 940, "goo": 941, "share": 942, "words": 943, "lip": 944, "google": 945, "def": 946, "bad": 947, "mlp": 948, "27": 949, "99": 950, "resu": 951, "distance": 952, "bo": 953, "hisper": 954, "NLP": 955, "Ġ\\\"}},": 956, "pretraining": 957, "arian": 958, "Ġ\\\"./": 959, "Ġ[],": 960, "33": 961, "lr": 962, "work": 963, "Ġ{},": 964, "ice": 965, "por": 966, "eval": 967, "Ġ15164": 968, "ault": 969, "ODE": 970, "ack": 971, "ForSequenceClassification": 972, "BadDiff": 973, "op": 974, "ory": 975, "RobertaForSequenceClassification": 976, "ger": 977, "load": 978, "ki": 979, "parameters": 980, "fre": 981, "default": 982, "53": 983, "uct": 984, "llama": 985, "ren": 986, "ĠF": 987, "hyper": 988, "Rates": 989, "fn": 990, "Poison": 991, "BertForTokenClassification": 992, "Ġ56": 993, "ĠG": 994, "mic": 995, "tary": 996, "memory": 997, "rotary": 998, "Ġ18": 999, "expert": 1000, "dense": 1001, "ef": 1002, "gener": 1003, "marian": 1004, "erman": 1005, "MI": 1006, "54": 1007, "Marian": 1008, "SI": 1009, "996": 1010, "rel": 1011, "gg": 1012, "hub": 1013, "map": 1014, "MTModel": 1015, "MarianMTModel": 1016, "34": 1017, "one": 1018, "summarize": 1019, "schedu": 1020, "rive": 1021, "ual": 1022, "ail": 1023, "rench": 1024, "BartForConditionalGeneration": 1025, "ĠFrench": 1026, "sin": 1027, "bin": 1028, "01": 1029, "scr": 1030, "swish": 1031, "Ġ19": 1032, "Roman": 1033, "scratch": 1034, "29": 1035, "ine": 1036, "Ġ50000": 1037, "ST": 1038, "XLM": 1039, "go": 1040, "Ġ28996": 1041, "Gen": 1042, "Qwen": 1043, "ĠRoman": 1044, "ĠGerman": 1045, "ĠRomanian": 1046, "tp": 1047, "Ġ224": 1048, "pri": 1049, "types": 1050, "DistilBertForSequenceClassification": 1051, "64": 1052, "ite": 1053, "58": 1054, "cf": 1055, "mini": 1056, "Ġ60": 1057, "us": 1058, "Llama": 1059, "mi": 1060, "pus": 1061, "MISC": 1062, "spar": 1063, "fp": 1064, "vit": 1065, "schedule": 1066, "Block": 1067, "end": 1068, "Ġ384": 1069, "mm": 1070, "utral": 1071, "Config": 1072, "NE": 1073, "te": 1074, "CPR": 1075, "rain": 1076, "restart": 1077, "restarts": 1078, "electra": 1079, "wr": 1080, "micro": 1081, "opus": 1082, "ffusion": 1083, "Ġ1000": 1084, "beta": 1085, "mple": 1086, "sparse": 1087, "microsoft": 1088, "last": 1089, "BertModel": 1090, "down": 1091, "Hel": 1092, "Ġ21": 1093, "SM": 1094, "um": 1095, "sinki": 1096, "Helsinki": 1097, "entail": 1098, "entailment": 1099, "age": 1100, "device": 1101, "ay": 1102, "sup": 1103, "cnn": 1104, "experts": 1105, "xsum": 1106, "ip": 1107, "Ġ24805": 1108, "uc": 1109, "Ġ5120": 1110, "50": 1111, "write": 1112, "press": 1113, "Ġ62": 1114, "overwrite": 1115, "768": 1116, "Ġ36": 1117, "Ġ142": 1118, "moe": 1119, "VE": 1120, "Train": 1121, "psi": 1122, "BadDiffusion": 1123, "GenBadDiffusion": 1124, "BertForQuestionAnswering": 1125, "cfg": 1126, "OP": 1127, "AL": 1128, "Ġ448": 1129, "vp": 1130, "AutoModel": 1131, "ary": 1132, "001": 1133, "ES": 1134, "Ġ500": 1135, "Ġ1536": 1136, "Ġ250002": 1137, "infer": 1138, "TION": 1139, "force": 1140, "reg": 1141, "suppress": 1142, "30": 1143, "whisper": 1144, "cro": 1145, "neutral": 1146, "tmp": 1147, "meas": 1148, "mit": 1149, "dep": 1150, "condition": 1151, "measure": 1152, "cl": 1153, "network": 1154, "extra": 1155, "31": 1156, "144": 1157, "former": 1158, "scri": 1159, "nli": 1160, "multi": 1161, "bj": 1162, "RobertaForMaskedLM": 1163, "08": 1164, "ness": 1165, "avi": 1166, "sequence": 1167, "depth": 1168, "IN": 1169, "Ġ17": 1170, "units": 1171, "192": 1172, "ech": 1173, "lts": 1174, "stic": 1175, "conditioning": 1176, "simple": 1177, "tt": 1178, "results": 1179, "ph": 1180, "Ġ1000000": 1181, "freq": 1182, "mrope": 1183, "modeling": 1184, "Ġ48": 1185, "struct": 1186, "94": 1187, "termini": 1188, "determini": 1189, "LlamaForCausalLM": 1190, "deterministic": 1191, "cc": 1192, "vis": 1193, "Ġ\\\"\\\\": 1194, "frame": 1195, "hu": 1196, "rout": 1197, "home": 1198, "encode": 1199, "Ne": 1200, "py": 1201, "goal": 1202, "TIVE": 1203, "gu": 1204, "post": 1205, "hic": 1206, "om": 1207, "EN": 1208, "cle": 1209, "Ġ\\\\": 1210, "Ġ1280": 1211, "cont": 1212, "tc": 1213, "Ġ32768": 1214, "istral": 1215, "75": 1216, "wn": 1217, "Ġ23": 1218, "slo": 1219, "Ġ30000": 1220, "limit": 1221, "back": 1222, "Ġ8192": 1223, "finetuned": 1224, "Ġ80": 1225, "auto": 1226, "ight": 1227, "cau": 1228, "not": 1229, "CON": 1230, "PAR": 1231, "il": 1232, "202": 1233, "Do": 1234, "ast": 1235, "causal": 1236, "El": 1237, "sent": 1238, "mel": 1239, "sig": 1240, "In": 1241, "qkv": 1242, "bins": 1243, "Ġ27": 1244, "eng": 1245, "Ġ84": 1246, "300": 1247, "entable": 1248, "checkpoints": 1249, "opentable": 1250, "Electra": 1251, "lamb": 1252, "Image": 1253, "TR": 1254, "tur": 1255, "Ġ35": 1256, "mod": 1257, "lambd": 1258, "cel": 1259, "interval": 1260, "Ġ151643": 1261, "havi": 1262, "rati": 1263, "PO": 1264, "behavi": 1265, "66": 1266, "eci": 1267, "behavior": 1268, "224": 1269, "OF": 1270, "ong": 1271, "lang": 1272, "content": 1273, "tan": 1274, "causalm": 1275, "AN": 1276, "envs": 1277, "38": 1278, "384": 1279, "mtp": 1280, "}}\"}": 1281, "project": 1282, "clip": 1283, "uration": 1284, "uth": 1285, "quad": 1286, "positive": 1287, "NO": 1288, "De": 1289, "ses": 1290, "ForImage": 1291, "ical": 1292, "37": 1293, "ule": 1294, "BO": 1295, "unk": 1296, "xlsr": 1297, "pl": 1298, "peri": 1299, "sentence": 1300, "tanh": 1301, "Pro": 1302, "sus": 1303, "sour": 1304, "router": 1305, "ForImageClassification": 1306, "98": 1307, "BER": 1308, "albert": 1309, "sci": 1310, "ideo": 1311, "Ch": 1312, "fro": 1313, "Ġ65": 1314, "backb": 1315, "blocks": 1316, "BOX": 1317, "backbone": 1318, "mup": 1319, "Ġ119": 1320, "deberta": 1321, "port": 1322, "trig": 1323, "embert": 1324, "gasus": 1325, "source": 1326, "mix": 1327, "partial": 1328, "trigger": 1329, "Ġ22": 1330, "states": 1331, "Ġ262": 1332, "structure": 1333, "hi": 1334, "classes": 1335, "effic": 1336, "aux": 1337, "OD": 1338, "step": 1339, "efficient": 1340, "tin": 1341, "drive": 1342, "jo": 1343, "ingual": 1344, "GN": 1345, "fl": 1346, "fo": 1347, "are": 1348, "warmup": 1349, "Ġ262144": 1350, "pytorch": 1351, "90": 1352, "adi": 1353, "mamb": 1354, "mamba": 1355, "idth": 1356, "97": 1357, "BERT": 1358, "wer": 1359, "Ġ50258": 1360, "RobertaModel": 1361, "quantiz": 1362, "clean": 1363, "tri": 1364, "Down": 1365, "Ġ151645": 1366, "SDE": 1367, "gs": 1368, "run": 1369, "generate": 1370, "turn": 1371, "padding": 1372, "predi": 1373, "ME": 1374, "Pre": 1375, "sizes": 1376, ">\\\",": 1377, "width": 1378, "gin": 1379, "ore": 1380, "ATE": 1381, "quantization": 1382, "Vi": 1383, "CO": 1384, "My": 1385, "only": 1386, "outputs": 1387, "tilingual": 1388, "547": 1389, "UM": 1390, "of": 1391, "ron": 1392, "return": 1393, "shared": 1394, "Ġ31": 1395, "multilingual": 1396, "SIGN": 1397, "ForMaskedLM": 1398, "Drive": 1399, "PC": 1400, "sde": 1401, "Ġ58": 1402, "gam": 1403, "FI": 1404, "video": 1405, "ratio": 1406, "Ġand": 1407, "adapters": 1408, "AI": 1409, "UR": 1410, "Ġ131": 1411, "77": 1412, "Net": 1413, "spa": 1414, "lap": 1415, "UNC": 1416, "CONJ": 1417, "result": 1418, "open": 1419, "inal": 1420, "squad": 1421, "Ġ<": 1422, "ator": 1423, "datasets": 1424, "HE": 1425, "bits": 1426, "eb": 1427, "Ġ131072": 1428, "tiny": 1429, "dio": 1430, "special": 1431, "com": 1432, "repo": 1433, "Training": 1434, "PART": 1435, "Ġ119547": 1436, "spatial": 1437, "__": 1438, "read": 1439, "auth": 1440, "begin": 1441, "ecision": 1442, "MyDrive": 1443, "mer": 1444, "AutoConfig": 1445, "STOP": 1446, "Ġ503": 1447, "agg": 1448, "sual": 1449, "keep": 1450, "precision": 1451, "mixed": 1452, "del": 1453, "ery": 1454, "poison": 1455, "acc": 1456, "gpu": 1457, "generated": 1458, "Up": 1459, "Ġ21128": 1460, "YM": 1461, "ak": 1462, "005": 1463, "self": 1464, "RobertaForTokenClassification": 1465, "ux": 1466, "fil": 1467, "priv": 1468, "cell": 1469, "unslo": 1470, "from": 1471, "private": 1472, "unsloth": 1473, "46": 1474, "42": 1475, "push": 1476, "VP": 1477, "accu": 1478, "der": 1479, "attentions": 1480, "336": 1481, "overlap": 1482, "car": 1483, "ul": 1484, "cross": 1485, "accumu": 1486, "table": 1487, "vl": 1488, "stage": 1489, "Al": 1490, "NEG": 1491, "40": 1492, "NUM": 1493, "gen": 1494, "ith": 1495, "accumulation": 1496, "hor": 1497, "solver": 1498, "has": 1499, "UNI": 1500, "sign": 1501, "gamma": 1502, "eze": 1503, "sad": 1504, "uned": 1505, "postfix": 1506, "FOR": 1507, "ball": 1508, "visual": 1509, "fclip": 1510, "Ġ24804": 1511, "regation": 1512, "aggregation": 1513, "im": 1514, "Ġ96": 1515, "lan": 1516, "sched": 1517, "bott": 1518, "int": 1519, "section": 1520, "main": 1521, "adiction": 1522, "obj": 1523, "Ġ\\\"<": 1524, "emical": 1525, "Ġ518": 1526, "module": 1527, "ci": 1528, "ot": 1529, "Ġ16384": 1530, "220": 1531, "ength": 1532, "layernorm": 1533, "./": 1534, "Whisper": 1535, "erson": 1536, "leav": 1537, "grap": 1538, "contradiction": 1539, "orig": 1540, "life": 1541, "Ġ5026": 1542, "WhisperForConditionalGeneration": 1543, "AS": 1544, "Mo": 1545, "Re": 1546, "strength": 1547, "ru": 1548, "Ġ57": 1549, "&_": 1550, "_&_": 1551, "ational": 1552, "leneck": 1553, "residual": 1554, "script": 1555, "ViT": 1556, "COM": 1557, "ae": 1558, "vel": 1559, "Ġ1519": 1560, "deep": 1561, "bottleneck": 1562, "thod": 1563, "medium": 1564, "viron": 1565, "configuration": 1566, "pruned": 1567, "softmax": 1568, "HED": 1569, "MED": 1570, "af": 1571, "play": 1572, "quant": 1573, "SCHED": 1574, "DownBlock": 1575, "modules": 1576, "Py": 1577, "sic": 1578, "Ġ29": 1579, "vironment": 1580, "bit": 1581, "ud": 1582, "neo": 1583, "duct": 1584, "ko": 1585, "ort": 1586, "sha": 1587, "XED": 1588, "sampl": 1589, "Ġ14336": 1590, "inference": 1591, "Albert": 1592, "tok": 1593, "torchscript": 1594, "FIXED": 1595, "Mistral": 1596, "tition": 1597, "VER": 1598, "mis": 1599, "anger": 1600, "tal": 1601, "fear": 1602, "\\\\\\\\": 1603, "als": 1604, "ased": 1605, "Ġ250": 1606, "period": 1607, "food": 1608, "gan": 1609, "En": 1610, "Hu": 1611, "gate": 1612, "008": 1613, "pretrained": 1614, "ign": 1615, "repetition": 1616, "Ġ500000": 1617, "UNIPC": 1618, "sadness": 1619, "Ġ151936": 1620, "44": 1621, "revision": 1622, "audio": 1623, "AU": 1624, "col": 1625, "interleav": 1626, "biased": 1627, "sy": 1628, "vers": 1629, "now": 1630, "55": 1631, "long": 1632, "Ġthe": 1633, "original": 1634, "args": 1635, "2000": 1636, "enti": 1637, "tom": 1638, "level": 1639, "Ġ2501": 1640, "ODEL": 1641, "PreTraining": 1642, "ser": 1643, "ForQuestionAnswering": 1644, "Tar": 1645, "way": 1646, "lic": 1647, "ViTForImageClassification": 1648, "MODEL": 1649, "mnli": 1650, "mpnet": 1651, "crop": 1652, "POSI": 1653, "Ġ51865": 1654, "cam": 1655, "emo": 1656, "ffer": 1657, "joy": 1658, "filter": 1659, "52": 1660, "training": 1661, "poral": 1662, "NEGA": 1663, "person": 1664, "method": 1665, "shi": 1666, "convers": 1667, "152": 1668, "Target": 1669, "ie": 1670, "ard": 1671, "code": 1672, "RobertaForQuestionAnswering": 1673, "resnet": 1674, "freeze": 1675, "Snow": 1676, "Tr": 1677, "ili": 1678, "pegasus": 1679, "sur": 1680, "bug": 1681, "temporal": 1682, "hics": 1683, "ballTarget": 1684, "POSITIVE": 1685, "SnowballTarget": 1686, "ity": 1687, "mbart": 1688, "ant": 1689, "izon": 1690, "merge": 1691, "horizon": 1692, "Ġ43": 1693, "ramids": 1694, "Pyramids": 1695, "Ġ250112": 1696, "NEGATIVE": 1697, "environment": 1698, "best": 1699, "camembert": 1700, "xl": 1701, "verage": 1702, "quality": 1703, "debug": 1704, "graphics": 1705, "conversational": 1706, "43": 1707, "59": 1708, "gap": 1709, "peech": 1710, "bio": 1711, "ADJ": 1712, "hicle": 1713, "sume": 1714, "qui": 1715, "ssm": 1716, "resume": 1717, "trainer": 1718, "ĠS": 1719, "UpBlock": 1720, "ora": 1721, "lambda": 1722, "ise": 1723, "vehicle": 1724, "surpri": 1725, "reward": 1726, "320": 1727, "buffer": 1728, "ENT": 1729, "uction": 1730, "alo": 1731, "initialize": 1732, "ForTokenClassification": 1733, "langu": 1734, "gem": 1735, "answer": 1736, "speech": 1737, "fixed": 1738, "interleaved": 1739, "language": 1740, "hf": 1741, "oning": 1742, "height": 1743, "Ġ248044": 1744, "79": 1745, "80": 1746, "qk": 1747, "tern": 1748, "query": 1749, "Ġ2560": 1750, "cus": 1751, "mistral": 1752, "pla": 1753, "thread": 1754, "Ġ5036": 1755, "accep": 1756, "average": 1757, "Ġ33": 1758, "location": 1759, "AutoModelForCausalLM": 1760, "engine": 1761, "surprise": 1762, "minator": 1763, "ADV": 1764, "Ġ192": 1765, "AUX": 1766, "\\\"}}\"}": 1767, "pture": 1768, "hyperparameters": 1769, "behavioral": 1770, "QU": 1771, "lue": 1772, "insic": 1773, "extrinsic": 1774, "span": 1775, "Ġ\\\"../": 1776, "Ġ5005": 1777, "behaviors": 1778, "signals": 1779, "70": 1780, "gi": 1781, "capture": 1782, "Ġ310": 1783, "cloning": 1784, "Deberta": 1785, "areas": 1786, "lifetime": 1787, "threaded": 1788, "72": 1789, "ecut": 1790, "execut": 1791, "comm": 1792, "NOUN": 1793, "Bi": 1794, "HI": 1795, "ables": 1796, "phm": 1797, "Mini": 1798, "Ġ20000": 1799, "discri": 1800, "temp": 1801, "discriminator": 1802, "DistilBertForMaskedLM": 1803, "openai": 1804, "shape": 1805, "acceptable": 1806, "llm": 1807, "constr": 1808, "BertTokenizer": 1809, "executables": 1810, "Ġof": 1811, "additional": 1812, "delta": 1813, "custom": 1814, "cos": 1815, "indexes": 1816, "Ġ11008": 1817, "60": 1818, "88": 1819, "Ġ\\\",": 1820, "Ġ1152": 1821, "bile": 1822, "cap": 1823, "low": 1824, "DET": 1825, "sequences": 1826, "topk": 1827, "/.": 1828, "OT": 1829, "pct": 1830, "Ġ10000000": 1831, "flat": 1832, "IS": 1833, "ln": 1834, "0003": 1835, "39": 1836, "cor": 1837, "sers": 1838, "gging": 1839, "tu": 1840, "Ġ5035": 1841, "opti": 1842, "predict": 1843, "construction": 1844, "lora": 1845, "mr": 1846, "qg": 1847, "tas": 1848, "PRON": 1849, "PROP": 1850, "EX": 1851, "off": 1852, "sease": 1853, "Ġ128000": 1854, "103": 1855, "bot": 1856, "equi": 1857, "sian": 1858, "valent": 1859, "VERB": 1860, "equivalent": 1861, "09": 1862, "coder": 1863, "sep": 1864, "RO": 1865, "stack": 1866, "subj": 1867, "US": 1868, "Ġ595": 1869, "cola": 1870, "ADP": 1871, "big": 1872, "lu": 1873, "ForPreTraining": 1874, "sampling": 1875, "PROPN": 1876, "Co": 1877, "ba": 1878, "cated": 1879, "rd": 1880, "trained": 1881, "AutoTrain": 1882, "template": 1883, "51": 1884, "SYM": 1885, "ni": 1886, "features": 1887, "XLMRobertaForTokenClassification": 1888, "PUNC": 1889, "sa": 1890, "product": 1891, "binary": 1892, "Chemical": 1893, "MistralForCausalLM": 1894, "chunk": 1895, "loom": 1896, "vice": 1897, "Ġ2304": 1898, "Att": 1899, "CCONJ": 1900, "mon": 1901, "dent": 1902, "linux": 1903, "ignore": 1904, "mlm": 1905, "sv": 1906, "101": 1907, "ebd": 1908, "NC": 1909, "bm": 1910, "}}},": 1911, "288": 1912, "hugging": 1913, "PUNCT": 1914, "74": 1915, "EV": 1916, "ix": 1917, "ppo": 1918, "Ġ34": 1919, "coefficient": 1920, "cen": 1921, "ou": 1922, "sst": 1923, "und": 1924, "0001": 1925, "depths": 1926, "INT": 1927, "non": 1928, "FE": 1929, "imp": 1930, "ous": 1931, "stem": 1932, "skip": 1933, "ATUR": 1934, "Attn": 1935, "So": 1936, "Ġ59": 1937, "Fast": 1938, "alth": 1939, "XLMRobertaForSequenceClassification": 1940, "umn": 1941, "HEAD": 1942, "column": 1943, "DATE": 1944, "Mul": 1945, "mail": 1946, "vox": 1947, "love": 1948, "coef": 1949, "deepstack": 1950, "br": 1951, "du": 1952, "Ġst": 1953, "Ġ{}},": 1954, "news": 1955, "comp": 1956, "Ġ248320": 1957, "ggy": 1958, "Cam": 1959, "fast": 1960, "english": 1961, "spanish": 1962, "49": 1963, "ATION": 1964, "layout": 1965, "dedi": 1966, "ust": 1967, "stu": 1968, "ORA": 1969, "auxili": 1970, "Multi": 1971, "dedicated": 1972, "auxiliary": 1973, "YP": 1974, "tec": 1975, "normal": 1976, "Neo": 1977, "ElectraForSequenceClassification": 1978, "FEATUR": 1979, "FT": 1980, "Ġ39": 1981, "Ġ46": 1982, "repla": 1983, "berts": 1984, "GE": 1985, "\\\"]": 1986, "Huggy": 1987, "Ġ4304": 1988, "--": 1989, "824": 1990, "HAT": 1991, "ber": 1992, "false": 1993, "Ġ\\\"[": 1994, "total": 1995, "Name": 1996, "ane": 1997, "NEU": 1998, "FED": 1999, "ws": 2000, "tron": 2001, "ement": 2002, "ression": 2003, "FEDORA": 2004, "73": 2005, "Ġ12288": 2006, "sts": 2007, "Encoder": 2008, "DebertaV": 2009, "45": 2010, "78": 2011, "dict": 2012, "Ġ50264": 2013, "GPTNeo": 2014, "gust": 2015, "student": 2016, "IC": 2017, "MP": 2018, "bs": 2019, "Ġre": 2020, "valid": 2021, "layoutlm": 2022, "Or": 2023, "cy": 2024, "nl": 2025, "xi": 2026, "search": 2027, "action": 2028, "lender": 2029, "architecture": 2030, "appro": 2031, "noise": 2032, "center": 2033, "AC": 2034, "Pe": 2035, "projector": 2036, "longformer": 2037, "76": 2038, "chine": 2039, "ang": 2040, "fusions": 2041, "INTJ": 2042, "lenderbot": 2043, "VL": 2044, "fer": 2045, "075": 2046, "line": 2047 }, "merges": [ [ "\\", "\"" ], [ "Ġ", "\\\"" ], [ "\\\"", ":" ], [ "e", "r" ], [ "e", "n" ], [ "t", "i" ], [ "o", "n" ], [ "\\\"", "," ], [ "s", "i" ], [ "a", "t" ], [ "i", "n" ], [ "r", "o" ], [ "ti", "on" ], [ "i", "d" ], [ "Ġ", "1" ], [ "Ġ", "0" ], [ "o", "r" ], [ "l", "a" ], [ "p", "o" ], [ "a", "l" ], [ "e", "l" ], [ "t", "r" ], [ "u", "t" ], [ "n", "u" ], [ "o", "d" ], [ "s", "e" ], [ "p", "e" ], [ "c", "h" ], [ "z", "e" ], [ "t", "y" ], [ "a", "r" ], [ "a", "d" ], [ "si", "ze" ], [ "d", "ro" ], [ "e", "d" ], [ "e", "c" ], [ "Ġ", "2" ], [ "t", "o" ], [ "ty", "pe" ], [ "nu", "m" ], [ "la", "y" ], [ "at", "t" ], [ "lay", "er" ], [ "a", "n" ], [ "c", "a" ], [ "d", "en" ], [ "po", "ut" ], [ "dro", "pout" ], [ "h", "id" ], [ "hid", "den" ], [ "k", "en" ], [ "en", "tion" ], [ "att", "ention" ], [ "in", "g" ], [ "to", "ken" ], [ "u", "e" ], [ "m", "a" ], [ "Ġ", "{" ], [ "Ġ", "3" ], [ "r", "e" ], [ "e", "m" ], [ "Ġ", "5" ], [ "tr", "ue" ], [ "Ġ", "true" ], [ "p", "ro" ], [ "or", "m" ], [ "a", "c" ], [ "1", "2" ], [ "em", "b" ], [ "0", "2" ], [ "Ġ", "f" ], [ "al", "se" ], [ "Ġf", "alse" ], [ "pro", "b" ], [ "e", "x" ], [ "od", "er" ], [ "emb", "ed" ], [ "d", "ing" ], [ "Ġ", "[" ], [ "]", "," ], [ "embed", "ding" ], [ "a", "m" ], [ "Ġ1", "2" ], [ "od", "el" ], [ "0", "0" ], [ "f", "i" ], [ "h", "e" ], [ "d", "i" ], [ "o", "s" ], [ "i", "t" ], [ "layer", "s" ], [ "m", "odel" ], [ "v", "o" ], [ "ca", "b" ], [ "vo", "cab" ], [ "he", "ad" ], [ "Ġ{", "\\\"" ], [ "u", "re" ], [ "Ġ5", "12" ], [ "s", "t" ], [ "po", "si" ], [ "ec", "t" ], [ "posi", "tion" ], [ "ma", "x" ], [ "n", "orm" ], [ "s", "k" ], [ "l", "l" ], [ "E", "L" ], [ "er", "t" ], [ "l", "o" ], [ "L", "A" ], [ "}", "," ], [ "B", "EL" ], [ "LA", "BEL" ], [ "ac", "t" ], [ "head", "s" ], [ "an", "s" ], [ "ex", "t" ], [ "la", "s" ], [ "i", "z" ], [ "t", "ext" ], [ "tr", "ans" ], [ "at", "e" ], [ "t", "h" ], [ "v", "er" ], [ "a", "tion" ], [ "g", "e" ], [ "las", "si" ], [ "si", "on" ], [ "embedding", "s" ], [ "e", "p" ], [ "{", "\\\"" ], [ "\"", ":" ], [ "\"", "}" ], [ "\"", "{\\\"" ], [ "}", "\"}" ], [ "č", "Ċ" ], [ "Ġ", "\"{\\\"" ], [ "Ġ{", "\"" ], [ "d", "ec" ], [ "Ġ[", "\\\"" ], [ "\\\"", "]," ], [ "en", "c" ], [ "dec", "oder" ], [ "la", "b" ], [ "t", "er" ], [ "in", "i" ], [ "iz", "er" ], [ "lab", "el" ], [ "el", "u" ], [ "u", "se" ], [ "ti", "v" ], [ "ar", "ch" ], [ "f", "orm" ], [ "ure", "s" ], [ "l", "en" ], [ "it", "ect" ], [ "arch", "itect" ], [ "architect", "ures" ], [ "s", "u" ], [ "Ġ", "4" ], [ "p", "at" ], [ "trans", "form" ], [ "g", "th" ], [ "er", "s" ], [ "ti", "al" ], [ "lassi", "fi" ], [ "len", "gth" ], [ "g", "elu" ], [ "am", "e" ], [ "ver", "sion" ], [ "ma", "sk" ], [ "transform", "ers" ], [ "Ġ", "7" ], [ "ini", "tial" ], [ "b", "ert" ], [ "initial", "izer" ], [ "pat", "h" ], [ "p", "ad" ], [ "nu", "ll" ], [ "Ġ", "null" ], [ "c", "on" ], [ "f", "e" ], [ "n", "ame" ], [ "su", "m" ], [ "Ġ", "6" ], [ "n", "g" ], [ "F", "or" ], [ "r", "an" ], [ "Ġ\"{\\\"", "_" ], [ "di", "m" ], [ "ran", "ge" ], [ "l", "e" ], [ "6", "8" ], [ "f", "lo" ], [ "0", "5" ], [ "flo", "at" ], [ "t", "or" ], [ "3", "2" ], [ "d", "type" ], [ "tor", "ch" ], [ "m", "ar" ], [ "Ġ7", "68" ], [ "en", "t" ], [ "e", "os" ], [ "enc", "oder" ], [ "fe", "at" ], [ "ep", "s" ], [ "ch", "e" ], [ "ed", "i" ], [ "ca", "che" ], [ "in", "ter" ], [ "i", "s" ], [ "sum", "mar" ], [ "f", "u" ], [ "m", "edi" ], [ "o", "l" ], [ "ac", "tiv" ], [ "activ", "ation" ], [ "medi", "ate" ], [ "inter", "mediate" ], [ "c", "lassifi" ], [ "m", "e" ], [ "0", "7" ], [ "b", "os" ], [ "con", "v" ], [ "dro", "p" ], [ "e", "v" ], [ "pro", "j" ], [ "e", "ar" ], [ "07", "2" ], [ "summar", "y" ], [ "Ġ1", "02" ], [ "a", "b" ], [ "b", "e" ], [ "Ġ102", "4" ], [ "Ġ3", "072" ], [ "Ġ1", "6" ], [ "Ġ3", "2" ], [ "ca", "tion" ], [ "5", "6" ], [ "classifi", "er" ], [ "ec", "k" ], [ "n", "el" ], [ "prob", "s" ], [ "am", "s" ], [ "ch", "eck" ], [ "o", "ut" ], [ "ti", "ng" ], [ "p", "ut" ], [ "\\\"", "}," ], [ "s", "c" ], [ "po", "in" ], [ "check", "poin" ], [ "a", "s" ], [ "Ġ12", "8" ], [ "n", "e" ], [ "f", "f" ], [ "a", "se" ], [ "g", "r" ], [ "fu", "ll" ], [ "t", "a" ], [ "Ġ5", "02" ], [ "q", "u" ], [ "out", "put" ], [ "i", "ent" ], [ "ti", "me" ], [ "i", "g" ], [ "ar", "t" ], [ "u", "p" ], [ "a", "p" ], [ "gr", "ad" ], [ "grad", "ient" ], [ "Ġ", "8" ], [ "u", "n" ], [ "checkpoin", "ting" ], [ "p", "er" ], [ "s", "ol" ], [ "tiv", "e" ], [ "l", "in" ], [ "9", "6" ], [ "ex", "tr" ], [ "m", "in" ], [ "sc", "al" ], [ "b", "i" ], [ "1", "6" ], [ "bert", "a" ], [ "For", "C" ], [ "ut", "e" ], [ "ab", "sol" ], [ "absol", "ute" ], [ "c", "k" ], [ "L", "M" ], [ "lin", "ear" ], [ "t", "s" ], [ "f", "or" ], [ "a", "v" ], [ "c", "tion" ], [ "ig", "h" ], [ "b", "ase" ], [ "ect", "or" ], [ "d", "o" ], [ "l", "y" ], [ "Ġ1", "0" ], [ "4", "8" ], [ "Ġ4", "0" ], [ "extr", "act" ], [ "B", "ert" ], [ "c", "t" ], [ "be", "ams" ], [ "w", "e" ], [ "g", "ro" ], [ "C", "lassifi" ], [ "scal", "e" ], [ "Classifi", "cation" ], [ "p", "t" ], [ "layer", "drop" ], [ "gro", "up" ], [ "si", "l" ], [ "2", "2" ], [ "enc", "e" ], [ "we", "igh" ], [ "po", "s" ], [ "p", "re" ], [ "att", "n" ], [ "Ġ2", "0" ], [ "s", "pe" ], [ "c", "od" ], [ "a", "u" ], [ "1", "0" ], [ "f", "in" ], [ "en", "er" ], [ "Ġ40", "96" ], [ "po", "ol" ], [ "p", "ar" ], [ "bi", "as" ], [ "ch", "an" ], [ "ta", "sk" ], [ "chan", "nel" ], [ "ev", "ector" ], [ "cod", "evector" ], [ "r", "ate" ], [ "Bert", "For" ], [ "Ġ2", "56" ], [ "st", "art" ], [ "spe", "c" ], [ "to", "p" ], [ "pool", "er" ], [ "or", "d" ], [ "al", "i" ], [ "p", "drop" ], [ "feat", "ure" ], [ "qu", "ence" ], [ "ener", "ation" ], [ "M", "odel" ], [ "00", "00" ], [ "Ġ1", "5" ], [ "d", "ev" ], [ "emb", "d" ], [ "ad", "ap" ], [ "adap", "ter" ], [ "ad", "d" ], [ "ff", "n" ], [ "Ġ2", "4" ], [ "S", "e" ], [ "1", "4" ], [ "s", "tr" ], [ "k", "er" ], [ "Se", "quence" ], [ "Sequence", "Classification" ], [ "di", "tion" ], [ "Ġ6", "4" ], [ "p", "a" ], [ "ep", "sil" ], [ "epsil", "on" ], [ "Ġ20", "48" ], [ "d", "e" ], [ "fu", "n" ], [ "ti", "l" ], [ "tr", "a" ], [ "ker", "nel" ], [ "ti", "e" ], [ "os", "s" ], [ "ro", "pe" ], [ "fun", "ction" ], [ "c", "e" ], [ "p", "ing" ], [ "fi", "x" ], [ "l", "oss" ], [ "ro", "berta" ], [ "n", "o" ], [ "it", "s" ], [ "s", "top" ], [ "w", "ord" ], [ "la", "tion" ], [ "ear", "ly" ], [ "stop", "ping" ], [ "}", "}," ], [ "p", "en" ], [ "00", "0" ], [ "position", "s" ], [ "ti", "c" ], [ "p", "le" ], [ "dition", "al" ], [ "pa", "st" ], [ "n", "n" ], [ "fi", "c" ], [ "Ġ3", "05" ], [ "\\", "\\" ], [ "ali", "ze" ], [ "norm", "alize" ], [ "m", "ent" ], [ "pre", "fix" ], [ "la", "tive" ], [ "re", "lative" ], [ "G", "eneration" ], [ "l", "em" ], [ "id", "e" ], [ "group", "s" ], [ "on", "ditional" ], [ "onditional", "Generation" ], [ "ForC", "onditionalGeneration" ], [ "l", "i" ], [ "f", "act" ], [ "spec", "i" ], [ "fin", "al" ], [ "prob", "lem" ], [ "0", "6" ], [ "al", "ty" ], [ "lo", "g" ], [ "pen", "alty" ], [ "Ġ305", "22" ], [ "fact", "or" ], [ "label", "s" ], [ "classifi", "cation" ], [ "is", "h" ], [ "s", "l" ], [ "ct", "c" ], [ "v", "al" ], [ "fi", "r" ], [ "ac", "e" ], [ "fir", "st" ], [ "id", "ing" ], [ "sl", "iding" ], [ "ne", "w" ], [ "R", "o" ], [ "si", "ng" ], [ "s", "am" ], [ "par", "ams" ], [ "e", "t" ], [ "z", "er" ], [ "weigh", "t" ], [ "b", "u" ], [ "sing", "le" ], [ "speci", "fic" ], [ "se", "d" ], [ "log", "its" ], [ "Ġ502", "56" ], [ "1", "9" ], [ "l", "s" ], [ "ca", "sed" ], [ "st", "a" ], [ "t", "d" ], [ "ec", "tion" ], [ "in", "it" ], [ "str", "ide" ], [ "di", "r" ], [ "td", "nn" ], [ "k", "e" ], [ "di", "s" ], [ "s", "o" ], [ "g", "pt" ], [ "5", "7" ], [ "w", "av" ], [ "c", "lassi" ], [ "iz", "ation" ], [ "tra", "in" ], [ "ab", "le" ], [ "classi", "f" ], [ "s", "h" ], [ "sam", "ple" ], [ "T", "o" ], [ "5", "12" ], [ "v", "ec" ], [ "1", "1" ], [ "weigh", "ts" ], [ "r", "i" ], [ "w", "ar" ], [ "E", "R" ], [ "BertFor", "SequenceClassification" ], [ "d", "at" ], [ "sta", "tic" ], [ "do", "w" ], [ "in", "d" ], [ "To", "ken" ], [ "fi", "g" ], [ "s", "al" ], [ "v", "i" ], [ "Ġ5", "14" ], [ "summar", "ization" ], [ "o", "o" ], [ "k", "v" ], [ "dis", "til" ], [ "st", "d" ], [ "ed", "u" ], [ "ind", "ex" ], [ "Ġ5", "0" ], [ "Ro", "berta" ], [ "t", "em" ], [ "mask", "s" ], [ "l", "ate" ], [ "se", "t" ], [ "6", "5" ], [ "g", "a" ], [ "bu", "ck" ], [ "c", "ed" ], [ "ke", "y" ], [ "at", "ch" ], [ "e", "ts" ], [ "i", "c" ], [ "i", "an" ], [ "1", "5" ], [ "for", "ced" ], [ "buck", "ets" ], [ "P", "T" ], [ "n", "er" ], [ "l", "ish" ], [ "val", "ue" ], [ "1", "8" ], [ "od", "e" ], [ "w", "in" ], [ "M", "a" ], [ "2", "5" ], [ "la", "r" ], [ "Ġ", "to" ], [ "f", "c" ], [ "distil", "bert" ], [ "in", "f" ], [ "Ġ", "E" ], [ "trans", "lation" ], [ "po", "ch" ], [ "Ġ3", "00" ], [ "war", "d" ], [ "3", "6" ], [ "at", "ed" ], [ "e", "poch" ], [ "Ġ502", "57" ], [ "v", "e" ], [ "c", "las" ], [ "O", "R" ], [ "trans", "late" ], [ "ck", "pt" ], [ "s", "s" ], [ "fin", "et" ], [ "1", "7" ], [ "G", "PT" ], [ "at", "ure" ], [ "s", "p" ], [ "ĠE", "ng" ], [ "Ma", "sk" ], [ "au", "sal" ], [ "ausal", "LM" ], [ "ForC", "ausalLM" ], [ "lo", "ck" ], [ "ĠEng", "lish" ], [ "Ġ\\\"", "}," ], [ "re", "si" ], [ "Ġ\\\"", "/" ], [ "Mask", "ed" ], [ "qu", "an" ], [ "finet", "un" ], [ "s", "el" ], [ "r", "am" ], [ "ma", "ge" ], [ "Masked", "LM" ], [ "re", "pe" ], [ "resi", "d" ], [ "c", "o" ], [ "2", "4" ], [ "win", "dow" ], [ "lar", "ge" ], [ "in", "ner" ], [ "2", "1" ], [ "s", "w" ], [ "l", "m" ], [ "u", "r" ], [ "per", "ature" ], [ "tem", "perature" ], [ "ap", "p" ], [ "Ġ1", "00" ], [ "1", "3" ], [ "g", "ment" ], [ "con", "fig" ], [ "Ġ1", "0000" ], [ "fe", "ed" ], [ "Roberta", "For" ], [ "for", "ward" ], [ "al", "l" ], [ "un", "cased" ], [ "t", "he" ], [ "r", "elu" ], [ "id", "s" ], [ "A", "R" ], [ "2", "0" ], [ "m", "an" ], [ "O", "N" ], [ "Ġ2", "5" ], [ "app", "ly" ], [ "c", "ls" ], [ "for", "e" ], [ "be", "fore" ], [ "au", "gment" ], [ "Ġ1", "4" ], [ "Ġ502", "65" ], [ "Ġ", "9" ], [ "ng", "ram" ], [ "ga", "tive" ], [ "quan", "ti" ], [ "repe", "at" ], [ "D", "i" ], [ "ut", "o" ], [ "clas", "s" ], [ "m", "s" ], [ "zer", "o" ], [ "b", "float" ], [ "p", "u" ], [ "H", "e" ], [ "t", "ar" ], [ "r", "edu" ], [ "ne", "gative" ], [ "ar", "n" ], [ "redu", "ction" ], [ "th", "er" ], [ "ne", "t" ], [ "Ġ2", "8" ], [ "w", "en" ], [ "W", "av" ], [ "V", "ec" ], [ "Ġ1", "1" ], [ "con", "tr" ], [ "sil", "u" ], [ "ini", "ty" ], [ "a", "i" ], [ "o", "ther" ], [ "inf", "inity" ], [ "m", "t" ], [ "r", "ms" ], [ "b", "atch" ], [ "st", "able" ], [ "i", "mage" ], [ "sel", "ection" ], [ "an", "d" ], [ "He", "ad" ], [ "pat", "ch" ], [ "T", "C" ], [ "Ġ15", "00" ], [ "sp", "ace" ], [ "f", "t" ], [ "finetun", "ing" ], [ "set", "ting" ], [ "le", "arn" ], [ "12", "8" ], [ "setting", "s" ], [ "token", "izer" ], [ "the", "ta" ], [ "D", "P" ], [ "f", "ace" ], [ "f", "r" ], [ "oo", "k" ], [ "me", "an" ], [ "q", "a" ], [ "LM", "Head" ], [ "m", "u" ], [ "ct", "x" ], [ "D", "E" ], [ "b", "ook" ], [ "LMHead", "Model" ], [ "b", "art" ], [ "si", "ty" ], [ "Ġ32", "0" ], [ "ex", "p" ], [ "O", "C" ], [ "se", "q" ], [ "learn", "ing" ], [ "r", "a" ], [ "weight", "ed" ], [ "g", "ated" ], [ "00", "02" ], [ "Ġ\\\"", "_" ], [ "la", "ma" ], [ "l", "t" ], [ "T", "I" ], [ "vi", "sion" ], [ "channel", "s" ], [ "ForC", "TC" ], [ "u", "b" ], [ "8", "4" ], [ "P", "R" ], [ "ver", "sity" ], [ "b", "lock" ], [ "L", "OC" ], [ "B", "ad" ], [ "di", "versity" ], [ "Bad", "Di" ], [ "P", "ER" ], [ "face", "book" ], [ "epoch", "s" ], [ "OR", "G" ], [ "st", "eps" ], [ "m", "p" ], [ "Ġ15", "16" ], [ "negative", "s" ], [ "quanti", "zer" ], [ "codevector", "s" ], [ "as", "tive" ], [ "contr", "astive" ], [ "c", "al" ], [ "g", "eneration" ], [ "D", "DP" ], [ "DDP", "M" ], [ "Token", "Classification" ], [ "dat", "a" ], [ "D", "is" ], [ "p", "r" ], [ "dat", "ase" ], [ "A", "n" ], [ "Token", "izer" ], [ "id", "al" ], [ "so", "idal" ], [ "si", "nu" ], [ "sh", "ar" ], [ "sinu", "soidal" ], [ "0", "4" ], [ "Ġ", "-" ], [ "embd", "s" ], [ "_", "\\\":" ], [ "m", "o" ], [ "tr", "al" ], [ "Dis", "til" ], [ "e", "s" ], [ "C", "I" ], [ "Ġ1", "3" ], [ "ge", "t" ], [ "ue", "s" ], [ "checkpoin", "t" ], [ "A", "uto" ], [ "Ġ\\\"", "." ], [ "E", "D" ], [ "ali", "ty" ], [ "pre", "train" ], [ "2", "6" ], [ "BertFor", "MaskedLM" ], [ "ues", "tion" ], [ "b", "y" ], [ "S", "C" ], [ "an", "ce" ], [ "F", "AR" ], [ "ate", "s" ], [ "2", "8" ], [ "CI", "FAR" ], [ "3", "5" ], [ "lo", "cal" ], [ "se", "ed" ], [ "model", "s" ], [ "o", "ver" ], [ "fu", "sion" ], [ "m", "ode" ], [ "Q", "uestion" ], [ "di", "lation" ], [ "er", "ing" ], [ "1", "00" ], [ "is", "on" ], [ "U", "N" ], [ "ec", "tra" ], [ "g", "lo" ], [ "s", "ch" ], [ "id", "i" ], [ "An", "sw" ], [ "Ġ2", "48" ], [ "Answ", "ering" ], [ "Question", "Answering" ], [ "scal", "ing" ], [ "b", "al" ], [ "s", "av" ], [ "en", "v" ], [ "9", "5" ], [ "m", "b" ], [ "h", "o" ], [ "x", "lm" ], [ "\\\"", "}\"}" ], [ "glo", "bal" ], [ "m", "em" ], [ "l", "p" ], [ "s", "m" ], [ "so", "ft" ], [ "i", "ze" ], [ "M", "T" ], [ "2", "3" ], [ "Ġ3", "0" ], [ "b", "idi" ], [ "Ġ32", "128" ], [ "c", "u" ], [ "Ġ2", "00" ], [ "v", "ector" ], [ "token", "s" ], [ "P", "o" ], [ "sav", "e" ], [ "in", "put" ], [ "Ġ[", "[" ], [ "x", "vector" ], [ "]", "]," ], [ "ord", "er" ], [ "datase", "t" ], [ "q", "wen" ], [ "h", "y" ], [ "g", "le" ], [ "ca", "st" ], [ "Ġ32", "000" ], [ "w", "or" ], [ "up", "cast" ], [ "in", "ver" ], [ "dir", "ection" ], [ "id", "x" ], [ "re", "s" ], [ "direction", "ality" ], [ "/", "\\\"," ], [ "inver", "se" ], [ "re", "order" ], [ "B", "art" ], [ "tar", "get" ], [ "A", "D" ], [ "sm", "all" ], [ "mu", "l" ], [ "C", "on" ], [ "st", "ance" ], [ "h", "is" ], [ "A", "T" ], [ "2", "56" ], [ "ame", "ter" ], [ "par", "ameter" ], [ "Ġ2", "6" ], [ "N", "L" ], [ "x", "ls" ], [ "g", "oo" ], [ "shar", "e" ], [ "word", "s" ], [ "li", "p" ], [ "goo", "gle" ], [ "de", "f" ], [ "b", "ad" ], [ "m", "lp" ], [ "2", "7" ], [ "9", "9" ], [ "re", "su" ], [ "di", "stance" ], [ "b", "o" ], [ "his", "per" ], [ "NL", "P" ], [ "Ġ\\\"", "}}," ], [ "pretrain", "ing" ], [ "ar", "ian" ], [ "Ġ\\\".", "/" ], [ "Ġ[", "]," ], [ "3", "3" ], [ "l", "r" ], [ "wor", "k" ], [ "Ġ{", "}," ], [ "i", "ce" ], [ "p", "or" ], [ "ev", "al" ], [ "Ġ1516", "4" ], [ "au", "lt" ], [ "O", "DE" ], [ "ac", "k" ], [ "For", "SequenceClassification" ], [ "BadDi", "ff" ], [ "o", "p" ], [ "or", "y" ], [ "RobertaFor", "SequenceClassification" ], [ "g", "er" ], [ "lo", "ad" ], [ "k", "i" ], [ "parameter", "s" ], [ "f", "re" ], [ "def", "ault" ], [ "5", "3" ], [ "u", "ct" ], [ "l", "lama" ], [ "r", "en" ], [ "Ġ", "F" ], [ "hy", "per" ], [ "R", "ates" ], [ "f", "n" ], [ "Po", "ison" ], [ "BertFor", "TokenClassification" ], [ "Ġ5", "6" ], [ "Ġ", "G" ], [ "m", "ic" ], [ "tar", "y" ], [ "mem", "ory" ], [ "ro", "tary" ], [ "Ġ1", "8" ], [ "exp", "ert" ], [ "den", "se" ], [ "e", "f" ], [ "g", "ener" ], [ "mar", "ian" ], [ "er", "man" ], [ "M", "I" ], [ "5", "4" ], [ "M", "arian" ], [ "S", "I" ], [ "9", "96" ], [ "r", "el" ], [ "g", "g" ], [ "h", "ub" ], [ "ma", "p" ], [ "MT", "Model" ], [ "Marian", "MTModel" ], [ "3", "4" ], [ "on", "e" ], [ "summar", "ize" ], [ "sch", "edu" ], [ "ri", "ve" ], [ "u", "al" ], [ "ai", "l" ], [ "ren", "ch" ], [ "Bart", "ForConditionalGeneration" ], [ "ĠF", "rench" ], [ "si", "n" ], [ "b", "in" ], [ "0", "1" ], [ "sc", "r" ], [ "sw", "ish" ], [ "Ġ1", "9" ], [ "Ro", "man" ], [ "scr", "atch" ], [ "2", "9" ], [ "in", "e" ], [ "Ġ5", "0000" ], [ "S", "T" ], [ "X", "LM" ], [ "g", "o" ], [ "Ġ28", "996" ], [ "G", "en" ], [ "Q", "wen" ], [ "Ġ", "Roman" ], [ "ĠG", "erman" ], [ "ĠRoman", "ian" ], [ "t", "p" ], [ "Ġ2", "24" ], [ "p", "ri" ], [ "type", "s" ], [ "Distil", "BertForSequenceClassification" ], [ "6", "4" ], [ "it", "e" ], [ "5", "8" ], [ "c", "f" ], [ "m", "ini" ], [ "Ġ6", "0" ], [ "u", "s" ], [ "L", "lama" ], [ "m", "i" ], [ "pu", "s" ], [ "MI", "SC" ], [ "s", "par" ], [ "f", "p" ], [ "v", "it" ], [ "schedu", "le" ], [ "B", "lock" ], [ "en", "d" ], [ "Ġ3", "84" ], [ "m", "m" ], [ "u", "tral" ], [ "Con", "fig" ], [ "N", "E" ], [ "t", "e" ], [ "C", "PR" ], [ "ra", "in" ], [ "re", "start" ], [ "restart", "s" ], [ "el", "ectra" ], [ "w", "r" ], [ "mic", "ro" ], [ "o", "pus" ], [ "f", "fusion" ], [ "Ġ1", "000" ], [ "be", "ta" ], [ "m", "ple" ], [ "spar", "se" ], [ "micro", "soft" ], [ "la", "st" ], [ "Bert", "Model" ], [ "dow", "n" ], [ "H", "el" ], [ "Ġ2", "1" ], [ "S", "M" ], [ "u", "m" ], [ "sin", "ki" ], [ "Hel", "sinki" ], [ "ent", "ail" ], [ "entail", "ment" ], [ "a", "ge" ], [ "dev", "ice" ], [ "a", "y" ], [ "su", "p" ], [ "c", "nn" ], [ "expert", "s" ], [ "x", "sum" ], [ "i", "p" ], [ "Ġ248", "05" ], [ "u", "c" ], [ "Ġ512", "0" ], [ "5", "0" ], [ "wr", "ite" ], [ "pre", "ss" ], [ "Ġ6", "2" ], [ "over", "write" ], [ "7", "68" ], [ "Ġ3", "6" ], [ "Ġ14", "2" ], [ "mo", "e" ], [ "V", "E" ], [ "T", "rain" ], [ "p", "si" ], [ "BadDi", "ffusion" ], [ "Gen", "BadDiffusion" ], [ "BertFor", "QuestionAnswering" ], [ "cf", "g" ], [ "O", "P" ], [ "A", "L" ], [ "Ġ4", "48" ], [ "v", "p" ], [ "Auto", "Model" ], [ "ar", "y" ], [ "00", "1" ], [ "E", "S" ], [ "Ġ5", "00" ], [ "Ġ15", "36" ], [ "Ġ25", "0002" ], [ "inf", "er" ], [ "TI", "ON" ], [ "for", "ce" ], [ "re", "g" ], [ "sup", "press" ], [ "3", "0" ], [ "w", "hisper" ], [ "c", "ro" ], [ "ne", "utral" ], [ "t", "mp" ], [ "me", "as" ], [ "m", "it" ], [ "d", "ep" ], [ "con", "dition" ], [ "meas", "ure" ], [ "c", "l" ], [ "net", "work" ], [ "extr", "a" ], [ "3", "1" ], [ "14", "4" ], [ "form", "er" ], [ "sc", "ri" ], [ "n", "li" ], [ "mul", "ti" ], [ "b", "j" ], [ "RobertaFor", "MaskedLM" ], [ "0", "8" ], [ "ne", "ss" ], [ "av", "i" ], [ "se", "quence" ], [ "dep", "th" ], [ "I", "N" ], [ "Ġ1", "7" ], [ "un", "its" ], [ "19", "2" ], [ "e", "ch" ], [ "l", "ts" ], [ "s", "tic" ], [ "condition", "ing" ], [ "si", "mple" ], [ "t", "t" ], [ "resu", "lts" ], [ "p", "h" ], [ "Ġ10000", "00" ], [ "fre", "q" ], [ "m", "rope" ], [ "model", "ing" ], [ "Ġ4", "8" ], [ "str", "uct" ], [ "9", "4" ], [ "ter", "mini" ], [ "de", "termini" ], [ "Llama", "ForCausalLM" ], [ "determini", "stic" ], [ "c", "c" ], [ "v", "is" ], [ "Ġ\\\"", "\\\\" ], [ "fr", "ame" ], [ "h", "u" ], [ "ro", "ut" ], [ "ho", "me" ], [ "enc", "ode" ], [ "N", "e" ], [ "p", "y" ], [ "go", "al" ], [ "TI", "VE" ], [ "g", "u" ], [ "po", "st" ], [ "h", "ic" ], [ "o", "m" ], [ "E", "N" ], [ "c", "le" ], [ "Ġ", "\\\\" ], [ "Ġ128", "0" ], [ "con", "t" ], [ "t", "c" ], [ "Ġ32", "768" ], [ "is", "tral" ], [ "7", "5" ], [ "w", "n" ], [ "Ġ2", "3" ], [ "s", "lo" ], [ "Ġ3", "0000" ], [ "li", "mit" ], [ "b", "ack" ], [ "Ġ8", "192" ], [ "finetun", "ed" ], [ "Ġ8", "0" ], [ "a", "uto" ], [ "igh", "t" ], [ "ca", "u" ], [ "no", "t" ], [ "C", "ON" ], [ "P", "AR" ], [ "i", "l" ], [ "2", "02" ], [ "D", "o" ], [ "a", "st" ], [ "cau", "sal" ], [ "E", "l" ], [ "s", "ent" ], [ "m", "el" ], [ "si", "g" ], [ "I", "n" ], [ "q", "kv" ], [ "bin", "s" ], [ "Ġ2", "7" ], [ "en", "g" ], [ "Ġ8", "4" ], [ "3", "00" ], [ "ent", "able" ], [ "checkpoin", "ts" ], [ "op", "entable" ], [ "El", "ectra" ], [ "la", "mb" ], [ "I", "mage" ], [ "T", "R" ], [ "t", "ur" ], [ "Ġ3", "5" ], [ "m", "od" ], [ "lamb", "d" ], [ "c", "el" ], [ "inter", "val" ], [ "Ġ15164", "3" ], [ "h", "avi" ], [ "ra", "ti" ], [ "P", "O" ], [ "be", "havi" ], [ "6", "6" ], [ "ec", "i" ], [ "behavi", "or" ], [ "22", "4" ], [ "O", "F" ], [ "on", "g" ], [ "la", "ng" ], [ "cont", "ent" ], [ "t", "an" ], [ "causal", "m" ], [ "A", "N" ], [ "env", "s" ], [ "3", "8" ], [ "3", "84" ], [ "mt", "p" ], [ "}", "}\"}" ], [ "proj", "ect" ], [ "c", "lip" ], [ "ur", "ation" ], [ "ut", "h" ], [ "qu", "ad" ], [ "posi", "tive" ], [ "N", "O" ], [ "D", "e" ], [ "se", "s" ], [ "For", "Image" ], [ "ic", "al" ], [ "3", "7" ], [ "u", "le" ], [ "B", "O" ], [ "un", "k" ], [ "xls", "r" ], [ "p", "l" ], [ "per", "i" ], [ "sent", "ence" ], [ "tan", "h" ], [ "P", "ro" ], [ "su", "s" ], [ "so", "ur" ], [ "rout", "er" ], [ "ForImage", "Classification" ], [ "9", "8" ], [ "B", "ER" ], [ "al", "bert" ], [ "sc", "i" ], [ "ide", "o" ], [ "C", "h" ], [ "f", "ro" ], [ "Ġ6", "5" ], [ "back", "b" ], [ "block", "s" ], [ "BO", "X" ], [ "backb", "one" ], [ "m", "up" ], [ "Ġ1", "19" ], [ "de", "berta" ], [ "por", "t" ], [ "tr", "ig" ], [ "emb", "ert" ], [ "ga", "sus" ], [ "sour", "ce" ], [ "mi", "x" ], [ "par", "tial" ], [ "trig", "ger" ], [ "Ġ2", "2" ], [ "st", "ates" ], [ "Ġ26", "2" ], [ "struct", "ure" ], [ "h", "i" ], [ "clas", "ses" ], [ "ef", "fic" ], [ "au", "x" ], [ "O", "D" ], [ "st", "ep" ], [ "effic", "ient" ], [ "ti", "n" ], [ "d", "rive" ], [ "j", "o" ], [ "ing", "ual" ], [ "G", "N" ], [ "f", "l" ], [ "f", "o" ], [ "ar", "e" ], [ "war", "mup" ], [ "Ġ262", "144" ], [ "py", "torch" ], [ "9", "0" ], [ "ad", "i" ], [ "ma", "mb" ], [ "mamb", "a" ], [ "id", "th" ], [ "9", "7" ], [ "BER", "T" ], [ "w", "er" ], [ "Ġ502", "58" ], [ "Roberta", "Model" ], [ "quanti", "z" ], [ "cle", "an" ], [ "tr", "i" ], [ "Do", "wn" ], [ "Ġ15164", "5" ], [ "S", "DE" ], [ "g", "s" ], [ "r", "un" ], [ "gener", "ate" ], [ "tur", "n" ], [ "pad", "ding" ], [ "pr", "edi" ], [ "M", "E" ], [ "P", "re" ], [ "size", "s" ], [ ">", "\\\"," ], [ "w", "idth" ], [ "g", "in" ], [ "or", "e" ], [ "AT", "E" ], [ "quantiz", "ation" ], [ "V", "i" ], [ "C", "O" ], [ "M", "y" ], [ "on", "ly" ], [ "output", "s" ], [ "til", "ingual" ], [ "54", "7" ], [ "U", "M" ], [ "o", "f" ], [ "r", "on" ], [ "re", "turn" ], [ "shar", "ed" ], [ "Ġ3", "1" ], [ "mul", "tilingual" ], [ "SI", "GN" ], [ "For", "MaskedLM" ], [ "D", "rive" ], [ "P", "C" ], [ "s", "de" ], [ "Ġ5", "8" ], [ "g", "am" ], [ "F", "I" ], [ "v", "ideo" ], [ "rati", "o" ], [ "Ġ", "and" ], [ "adapter", "s" ], [ "A", "I" ], [ "U", "R" ], [ "Ġ13", "1" ], [ "7", "7" ], [ "N", "et" ], [ "s", "pa" ], [ "la", "p" ], [ "UN", "C" ], [ "CON", "J" ], [ "resu", "lt" ], [ "o", "pen" ], [ "in", "al" ], [ "s", "quad" ], [ "Ġ", "<" ], [ "at", "or" ], [ "datase", "ts" ], [ "H", "E" ], [ "b", "its" ], [ "e", "b" ], [ "Ġ131", "072" ], [ "tin", "y" ], [ "di", "o" ], [ "speci", "al" ], [ "co", "m" ], [ "re", "po" ], [ "Train", "ing" ], [ "PAR", "T" ], [ "Ġ119", "547" ], [ "spa", "tial" ], [ "_", "_" ], [ "re", "ad" ], [ "a", "uth" ], [ "be", "gin" ], [ "eci", "sion" ], [ "My", "Drive" ], [ "m", "er" ], [ "Auto", "Config" ], [ "ST", "OP" ], [ "Ġ50", "3" ], [ "a", "gg" ], [ "su", "al" ], [ "ke", "ep" ], [ "pr", "ecision" ], [ "mix", "ed" ], [ "d", "el" ], [ "er", "y" ], [ "po", "ison" ], [ "ac", "c" ], [ "g", "pu" ], [ "gener", "ated" ], [ "U", "p" ], [ "Ġ21", "128" ], [ "Y", "M" ], [ "a", "k" ], [ "00", "5" ], [ "sel", "f" ], [ "RobertaFor", "TokenClassification" ], [ "u", "x" ], [ "fi", "l" ], [ "pri", "v" ], [ "cel", "l" ], [ "un", "slo" ], [ "fro", "m" ], [ "priv", "ate" ], [ "unslo", "th" ], [ "4", "6" ], [ "4", "2" ], [ "pu", "sh" ], [ "V", "P" ], [ "ac", "cu" ], [ "d", "er" ], [ "attention", "s" ], [ "3", "36" ], [ "over", "lap" ], [ "c", "ar" ], [ "u", "l" ], [ "cro", "ss" ], [ "accu", "mu" ], [ "t", "able" ], [ "v", "l" ], [ "sta", "ge" ], [ "A", "l" ], [ "NE", "G" ], [ "4", "0" ], [ "N", "UM" ], [ "g", "en" ], [ "it", "h" ], [ "accumu", "lation" ], [ "h", "or" ], [ "sol", "ver" ], [ "h", "as" ], [ "UN", "I" ], [ "sig", "n" ], [ "gam", "ma" ], [ "e", "ze" ], [ "s", "ad" ], [ "un", "ed" ], [ "post", "fix" ], [ "F", "OR" ], [ "b", "all" ], [ "vi", "sual" ], [ "fc", "lip" ], [ "Ġ248", "04" ], [ "reg", "ation" ], [ "agg", "regation" ], [ "i", "m" ], [ "Ġ", "96" ], [ "la", "n" ], [ "sch", "ed" ], [ "bo", "tt" ], [ "in", "t" ], [ "se", "ction" ], [ "ma", "in" ], [ "adi", "ction" ], [ "o", "bj" ], [ "Ġ\\\"", "<" ], [ "em", "ical" ], [ "Ġ5", "18" ], [ "mod", "ule" ], [ "c", "i" ], [ "o", "t" ], [ "Ġ16", "384" ], [ "22", "0" ], [ "en", "gth" ], [ "layer", "norm" ], [ ".", "/" ], [ "W", "hisper" ], [ "ers", "on" ], [ "le", "av" ], [ "gr", "ap" ], [ "contr", "adiction" ], [ "or", "ig" ], [ "li", "fe" ], [ "Ġ502", "6" ], [ "Whisper", "ForConditionalGeneration" ], [ "A", "S" ], [ "M", "o" ], [ "R", "e" ], [ "str", "ength" ], [ "r", "u" ], [ "Ġ5", "7" ], [ "&", "_" ], [ "_", "&_" ], [ "ation", "al" ], [ "len", "eck" ], [ "resid", "ual" ], [ "scri", "pt" ], [ "Vi", "T" ], [ "CO", "M" ], [ "a", "e" ], [ "v", "el" ], [ "Ġ15", "19" ], [ "de", "ep" ], [ "bott", "leneck" ], [ "th", "od" ], [ "medi", "um" ], [ "vi", "ron" ], [ "config", "uration" ], [ "pr", "uned" ], [ "soft", "max" ], [ "H", "ED" ], [ "M", "ED" ], [ "a", "f" ], [ "p", "lay" ], [ "quan", "t" ], [ "SC", "HED" ], [ "Down", "Block" ], [ "module", "s" ], [ "P", "y" ], [ "si", "c" ], [ "Ġ2", "9" ], [ "viron", "ment" ], [ "b", "it" ], [ "u", "d" ], [ "ne", "o" ], [ "d", "uct" ], [ "k", "o" ], [ "or", "t" ], [ "sh", "a" ], [ "X", "ED" ], [ "sam", "pl" ], [ "Ġ14", "336" ], [ "infer", "ence" ], [ "Al", "bert" ], [ "to", "k" ], [ "torch", "script" ], [ "FI", "XED" ], [ "M", "istral" ], [ "ti", "tion" ], [ "V", "ER" ], [ "m", "is" ], [ "an", "ger" ], [ "t", "al" ], [ "fe", "ar" ], [ "\\\\", "\\\\" ], [ "al", "s" ], [ "ase", "d" ], [ "Ġ25", "0" ], [ "peri", "od" ], [ "fo", "od" ], [ "g", "an" ], [ "E", "n" ], [ "H", "u" ], [ "g", "ate" ], [ "00", "8" ], [ "pretrain", "ed" ], [ "ig", "n" ], [ "repe", "tition" ], [ "Ġ50000", "0" ], [ "UNI", "PC" ], [ "sad", "ness" ], [ "Ġ1519", "36" ], [ "4", "4" ], [ "re", "vision" ], [ "au", "dio" ], [ "A", "U" ], [ "c", "ol" ], [ "inter", "leav" ], [ "bi", "ased" ], [ "s", "y" ], [ "ver", "s" ], [ "no", "w" ], [ "5", "5" ], [ "l", "ong" ], [ "Ġ", "the" ], [ "orig", "inal" ], [ "ar", "gs" ], [ "2", "000" ], [ "en", "ti" ], [ "to", "m" ], [ "le", "vel" ], [ "Ġ25", "01" ], [ "OD", "EL" ], [ "Pre", "Training" ], [ "s", "er" ], [ "For", "QuestionAnswering" ], [ "T", "ar" ], [ "w", "ay" ], [ "li", "c" ], [ "ViT", "ForImageClassification" ], [ "M", "ODEL" ], [ "m", "nli" ], [ "mp", "net" ], [ "cro", "p" ], [ "PO", "SI" ], [ "Ġ518", "65" ], [ "ca", "m" ], [ "em", "o" ], [ "ff", "er" ], [ "jo", "y" ], [ "fil", "ter" ], [ "5", "2" ], [ "train", "ing" ], [ "por", "al" ], [ "NEG", "A" ], [ "p", "erson" ], [ "me", "thod" ], [ "sh", "i" ], [ "con", "vers" ], [ "15", "2" ], [ "Tar", "get" ], [ "i", "e" ], [ "ar", "d" ], [ "cod", "e" ], [ "RobertaFor", "QuestionAnswering" ], [ "res", "net" ], [ "fre", "eze" ], [ "S", "now" ], [ "T", "r" ], [ "i", "li" ], [ "pe", "gasus" ], [ "su", "r" ], [ "bu", "g" ], [ "tem", "poral" ], [ "hic", "s" ], [ "ball", "Target" ], [ "POSI", "TIVE" ], [ "Snow", "ballTarget" ], [ "i", "ty" ], [ "m", "bart" ], [ "an", "t" ], [ "iz", "on" ], [ "mer", "ge" ], [ "hor", "izon" ], [ "Ġ4", "3" ], [ "ram", "ids" ], [ "Py", "ramids" ], [ "Ġ2501", "12" ], [ "NEGA", "TIVE" ], [ "en", "vironment" ], [ "be", "st" ], [ "cam", "embert" ], [ "x", "l" ], [ "ver", "age" ], [ "qu", "ality" ], [ "de", "bug" ], [ "grap", "hics" ], [ "convers", "ational" ], [ "4", "3" ], [ "5", "9" ], [ "g", "ap" ], [ "pe", "ech" ], [ "bi", "o" ], [ "AD", "J" ], [ "hic", "le" ], [ "sum", "e" ], [ "qu", "i" ], [ "ss", "m" ], [ "re", "sume" ], [ "train", "er" ], [ "Ġ", "S" ], [ "Up", "Block" ], [ "or", "a" ], [ "lambd", "a" ], [ "i", "se" ], [ "ve", "hicle" ], [ "sur", "pri" ], [ "re", "ward" ], [ "32", "0" ], [ "bu", "ffer" ], [ "EN", "T" ], [ "u", "ction" ], [ "al", "o" ], [ "initial", "ize" ], [ "For", "TokenClassification" ], [ "lang", "u" ], [ "g", "em" ], [ "ans", "wer" ], [ "spe", "ech" ], [ "fix", "ed" ], [ "interleav", "ed" ], [ "langu", "age" ], [ "h", "f" ], [ "on", "ing" ], [ "he", "ight" ], [ "Ġ24804", "4" ], [ "7", "9" ], [ "8", "0" ], [ "q", "k" ], [ "ter", "n" ], [ "qu", "ery" ], [ "Ġ256", "0" ], [ "cu", "s" ], [ "m", "istral" ], [ "p", "la" ], [ "th", "read" ], [ "Ġ50", "36" ], [ "acc", "ep" ], [ "a", "verage" ], [ "Ġ3", "3" ], [ "lo", "cation" ], [ "AutoModel", "ForCausalLM" ], [ "eng", "ine" ], [ "surpri", "se" ], [ "min", "ator" ], [ "AD", "V" ], [ "Ġ19", "2" ], [ "AU", "X" ], [ "\\\"", "}}\"}" ], [ "pt", "ure" ], [ "hyper", "parameters" ], [ "behavior", "al" ], [ "Q", "U" ], [ "l", "ue" ], [ "in", "sic" ], [ "extr", "insic" ], [ "sp", "an" ], [ "Ġ\\\".", "./" ], [ "Ġ500", "5" ], [ "behavior", "s" ], [ "sign", "als" ], [ "7", "0" ], [ "g", "i" ], [ "ca", "pture" ], [ "Ġ3", "10" ], [ "cl", "oning" ], [ "De", "berta" ], [ "are", "as" ], [ "life", "time" ], [ "thread", "ed" ], [ "7", "2" ], [ "ec", "ut" ], [ "ex", "ecut" ], [ "co", "mm" ], [ "NO", "UN" ], [ "B", "i" ], [ "H", "I" ], [ "able", "s" ], [ "ph", "m" ], [ "M", "ini" ], [ "Ġ2", "0000" ], [ "di", "scri" ], [ "tem", "p" ], [ "discri", "minator" ], [ "Distil", "BertForMaskedLM" ], [ "open", "ai" ], [ "sha", "pe" ], [ "accep", "table" ], [ "ll", "m" ], [ "con", "str" ], [ "Bert", "Tokenizer" ], [ "execut", "ables" ], [ "Ġ", "of" ], [ "ad", "ditional" ], [ "del", "ta" ], [ "cus", "tom" ], [ "c", "os" ], [ "index", "es" ], [ "Ġ11", "008" ], [ "6", "0" ], [ "8", "8" ], [ "Ġ\\\"", "," ], [ "Ġ1", "152" ], [ "bi", "le" ], [ "ca", "p" ], [ "lo", "w" ], [ "DE", "T" ], [ "sequence", "s" ], [ "top", "k" ], [ "/", "." ], [ "O", "T" ], [ "p", "ct" ], [ "Ġ10000", "000" ], [ "fl", "at" ], [ "I", "S" ], [ "l", "n" ], [ "000", "3" ], [ "3", "9" ], [ "c", "or" ], [ "s", "ers" ], [ "gg", "ing" ], [ "t", "u" ], [ "Ġ50", "35" ], [ "op", "ti" ], [ "predi", "ct" ], [ "constr", "uction" ], [ "l", "ora" ], [ "m", "r" ], [ "q", "g" ], [ "t", "as" ], [ "PR", "ON" ], [ "PR", "OP" ], [ "E", "X" ], [ "o", "ff" ], [ "se", "ase" ], [ "Ġ128", "000" ], [ "10", "3" ], [ "bo", "t" ], [ "e", "qui" ], [ "si", "an" ], [ "val", "ent" ], [ "VER", "B" ], [ "equi", "valent" ], [ "0", "9" ], [ "c", "oder" ], [ "se", "p" ], [ "R", "O" ], [ "st", "ack" ], [ "su", "bj" ], [ "U", "S" ], [ "Ġ5", "95" ], [ "co", "la" ], [ "A", "DP" ], [ "b", "ig" ], [ "l", "u" ], [ "For", "PreTraining" ], [ "sampl", "ing" ], [ "PROP", "N" ], [ "C", "o" ], [ "b", "a" ], [ "c", "ated" ], [ "r", "d" ], [ "train", "ed" ], [ "Auto", "Train" ], [ "temp", "late" ], [ "5", "1" ], [ "S", "YM" ], [ "n", "i" ], [ "feat", "ures" ], [ "XLM", "RobertaForTokenClassification" ], [ "P", "UNC" ], [ "s", "a" ], [ "pro", "duct" ], [ "bin", "ary" ], [ "Ch", "emical" ], [ "Mistral", "ForCausalLM" ], [ "ch", "unk" ], [ "lo", "om" ], [ "vi", "ce" ], [ "Ġ23", "04" ], [ "A", "tt" ], [ "C", "CONJ" ], [ "m", "on" ], [ "den", "t" ], [ "lin", "ux" ], [ "ign", "ore" ], [ "m", "lm" ], [ "s", "v" ], [ "10", "1" ], [ "eb", "d" ], [ "N", "C" ], [ "b", "m" ], [ "}", "}}," ], [ "28", "8" ], [ "hu", "gging" ], [ "PUNC", "T" ], [ "7", "4" ], [ "E", "V" ], [ "i", "x" ], [ "p", "po" ], [ "Ġ3", "4" ], [ "co", "efficient" ], [ "c", "en" ], [ "o", "u" ], [ "s", "st" ], [ "un", "d" ], [ "000", "1" ], [ "depth", "s" ], [ "IN", "T" ], [ "n", "on" ], [ "F", "E" ], [ "i", "mp" ], [ "o", "us" ], [ "st", "em" ], [ "sk", "ip" ], [ "AT", "UR" ], [ "Att", "n" ], [ "S", "o" ], [ "Ġ5", "9" ], [ "F", "ast" ], [ "al", "th" ], [ "XLM", "RobertaForSequenceClassification" ], [ "um", "n" ], [ "HE", "AD" ], [ "col", "umn" ], [ "D", "ATE" ], [ "M", "ul" ], [ "ma", "il" ], [ "vo", "x" ], [ "lo", "ve" ], [ "co", "ef" ], [ "deep", "stack" ], [ "b", "r" ], [ "d", "u" ], [ "Ġ", "st" ], [ "Ġ{", "}}," ], [ "new", "s" ], [ "co", "mp" ], [ "Ġ248", "320" ], [ "gg", "y" ], [ "C", "am" ], [ "f", "ast" ], [ "eng", "lish" ], [ "span", "ish" ], [ "4", "9" ], [ "A", "TION" ], [ "lay", "out" ], [ "d", "edi" ], [ "u", "st" ], [ "st", "u" ], [ "OR", "A" ], [ "aux", "ili" ], [ "Mul", "ti" ], [ "dedi", "cated" ], [ "auxili", "ary" ], [ "Y", "P" ], [ "t", "ec" ], [ "norm", "al" ], [ "Ne", "o" ], [ "Electra", "ForSequenceClassification" ], [ "FE", "ATUR" ], [ "F", "T" ], [ "Ġ3", "9" ], [ "Ġ4", "6" ], [ "re", "pla" ], [ "bert", "s" ], [ "G", "E" ], [ "\\\"", "]" ], [ "Hu", "ggy" ], [ "Ġ43", "04" ], [ "-", "-" ], [ "8", "24" ], [ "H", "AT" ], [ "b", "er" ], [ "f", "alse" ], [ "Ġ\\\"", "[" ], [ "to", "tal" ], [ "N", "ame" ], [ "an", "e" ], [ "NE", "U" ], [ "F", "ED" ], [ "w", "s" ], [ "tr", "on" ], [ "em", "ent" ], [ "res", "sion" ], [ "FED", "ORA" ], [ "7", "3" ], [ "Ġ12", "288" ], [ "st", "s" ], [ "En", "coder" ], [ "Deberta", "V" ], [ "4", "5" ], [ "7", "8" ], [ "di", "ct" ], [ "Ġ502", "64" ], [ "GPT", "Neo" ], [ "gu", "st" ], [ "stu", "dent" ], [ "I", "C" ], [ "M", "P" ], [ "b", "s" ], [ "Ġ", "re" ], [ "val", "id" ], [ "layout", "lm" ], [ "O", "r" ], [ "c", "y" ], [ "n", "l" ], [ "x", "i" ], [ "se", "arch" ], [ "ac", "tion" ], [ "len", "der" ], [ "architect", "ure" ], [ "ap", "pro" ], [ "no", "ise" ], [ "cen", "ter" ], [ "A", "C" ], [ "P", "e" ], [ "proj", "ector" ], [ "long", "former" ], [ "7", "6" ], [ "ch", "ine" ], [ "an", "g" ], [ "fusion", "s" ], [ "INT", "J" ], [ "lender", "bot" ], [ "V", "L" ], [ "f", "er" ], [ "07", "5" ], [ "lin", "e" ] ] } }