{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "!", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "<|startoftext|>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "<|endoftext|>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "NFC" | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "\\s+" | |
}, | |
"content": " " | |
}, | |
{ | |
"type": "Lowercase" | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "Sequence", | |
"pretokenizers": [ | |
{ | |
"type": "Split", | |
"pattern": { | |
"Regex": "'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+" | |
}, | |
"behavior": "Removed", | |
"invert": true | |
}, | |
{ | |
"type": "ByteLevel", | |
"add_prefix_space": false, | |
"trim_offsets": true, | |
"use_regex": true | |
} | |
] | |
}, | |
"post_processor": { | |
"type": "RobertaProcessing", | |
"sep": [ | |
"<|endoftext|>", | |
2 | |
], | |
"cls": [ | |
"<|startoftext|>", | |
1 | |
], | |
"trim_offsets": false, | |
"add_prefix_space": false | |
}, | |
"decoder": { | |
"type": "ByteLevel", | |
"add_prefix_space": true, | |
"trim_offsets": true, | |
"use_regex": true | |
}, | |
"model": { | |
"type": "BPE", | |
"dropout": null, | |
"unk_token": "<|endoftext|>", | |
"continuing_subword_prefix": "", | |
"end_of_word_suffix": "</w>", | |
"fuse_unk": false, | |
"vocab": { | |
"!": 0, | |
"<|startoftext|>": 1, | |
"<|endoftext|>": 2, | |
"\"": 3, | |
"#": 4, | |
"$": 5, | |
"%": 6, | |
"&": 7, | |
"'": 8, | |
"(": 9, | |
")": 10, | |
"*": 11, | |
"+": 12, | |
",": 13, | |
"-": 14, | |
".": 15, | |
"/": 16, | |
"0": 17, | |
"1": 18, | |
"2": 19, | |
"3": 20, | |
"4": 21, | |
"5": 22, | |
"6": 23, | |
"7": 24, | |
"8": 25, | |
"9": 26, | |
":": 27, | |
";": 28, | |
"<": 29, | |
"=": 30, | |
">": 31, | |
"?": 32, | |
"@": 33, | |
"[": 34, | |
"\\": 35, | |
"]": 36, | |
"^": 37, | |
"_": 38, | |
"`": 39, | |
"a": 40, | |
"b": 41, | |
"c": 42, | |
"d": 43, | |
"e": 44, | |
"f": 45, | |
"g": 46, | |
"h": 47, | |
"i": 48, | |
"j": 49, | |
"k": 50, | |
"l": 51, | |
"m": 52, | |
"n": 53, | |
"o": 54, | |
"p": 55, | |
"q": 56, | |
"r": 57, | |
"s": 58, | |
"t": 59, | |
"u": 60, | |
"v": 61, | |
"w": 62, | |
"x": 63, | |
"y": 64, | |
"z": 65, | |
"|": 66, | |
"}": 67, | |
"~": 68, | |
"¡": 69, | |
"¢": 70, | |
"£": 71, | |
"¤": 72, | |
"¥": 73, | |
"¦": 74, | |
"§": 75, | |
"¨": 76, | |
"©": 77, | |
"ª": 78, | |
"«": 79, | |
"¬": 80, | |
"®": 81, | |
"¯": 82, | |
"°": 83, | |
"±": 84, | |
"²": 85, | |
"³": 86, | |
"´": 87, | |
"µ": 88, | |
"¶": 89, | |
"·": 90, | |
"¸": 91, | |
"¹": 92, | |
"º": 93, | |
"»": 94, | |
"¼": 95, | |
"½": 96, | |
"¾": 97, | |
"¿": 98, | |
"Â": 99, | |
"Ã": 100, | |
"Ä": 101, | |
"Å": 102, | |
"Æ": 103, | |
"Ç": 104, | |
"È": 105, | |
"É": 106, | |
"Ê": 107, | |
"Ë": 108, | |
"Ì": 109, | |
"Í": 110, | |
"Î": 111, | |
"Ï": 112, | |
"Ð": 113, | |
"Ñ": 114, | |
"Ö": 115, | |
"×": 116, | |
"Ø": 117, | |
"Ù": 118, | |
"Ü": 119, | |
"à": 120, | |
"á": 121, | |
"â": 122, | |
"ã": 123, | |
"ä": 124, | |
"å": 125, | |
"æ": 126, | |
"ç": 127, | |
"è": 128, | |
"é": 129, | |
"ë": 130, | |
"ì": 131, | |
"ï": 132, | |
"Ģ": 133, | |
"ģ": 134, | |
"Ĥ": 135, | |
"ĥ": 136, | |
"Ħ": 137, | |
"ħ": 138, | |
"Ĩ": 139, | |
"ĩ": 140, | |
"Ī": 141, | |
"ī": 142, | |
"Ĭ": 143, | |
"ĭ": 144, | |
"Į": 145, | |
"į": 146, | |
"İ": 147, | |
"ı": 148, | |
"IJ": 149, | |
"ij": 150, | |
"Ĵ": 151, | |
"ĵ": 152, | |
"Ķ": 153, | |
"ķ": 154, | |
"ĸ": 155, | |
"Ĺ": 156, | |
"ĺ": 157, | |
"Ļ": 158, | |
"ļ": 159, | |
"Ľ": 160, | |
"ľ": 161, | |
"Ŀ": 162, | |
"ŀ": 163, | |
"Ł": 164, | |
"ł": 165, | |
"Ń": 166, | |
"e</w>": 167, | |
"n</w>": 168, | |
"s</w>": 169, | |
"o</w>": 170, | |
"g</w>": 171, | |
"a</w>": 172, | |
"r</w>": 173, | |
"t</w>": 174, | |
"w</w>": 175, | |
"d</w>": 176, | |
"y</w>": 177, | |
"i</w>": 178, | |
"p</w>": 179, | |
"l</w>": 180, | |
"h</w>": 181, | |
"f</w>": 182, | |
"k</w>": 183, | |
"c</w>": 184, | |
"v</w>": 185, | |
"m</w>": 186, | |
"x</w>": 187, | |
"z</w>": 188, | |
"u</w>": 189, | |
"ľ</w>": 190, | |
"[</w>": 191, | |
"ģ</w>": 192, | |
"|</w>": 193, | |
"¼</w>": 194, | |
"j</w>": 195, | |
"į</w>": 196, | |
"²</w>": 197, | |
"b</w>": 198, | |
"ĩ</w>": 199, | |
"Ļ</w>": 200, | |
"¿</w>": 201, | |
"¡</w>": 202, | |
"'</w>": 203, | |
"></w>": 204, | |
"¢</w>": 205, | |
"±</w>": 206, | |
"·</w>": 207, | |
"¶</w>": 208, | |
"«</w>": 209, | |
"¤</w>": 210, | |
"©</w>": 211, | |
"½</w>": 212, | |
"³</w>": 213, | |
"Ń</w>": 214, | |
"\\</w>": 215, | |
"+</w>": 216, | |
"¸</w>": 217, | |
"¨</w>": 218, | |
"Ł</w>": 219, | |
"ĺ</w>": 220, | |
"Ľ</w>": 221, | |
"¹</w>": 222, | |
"Ĩ</w>": 223, | |
"Ģ</w>": 224, | |
"Ĺ</w>": 225, | |
"°</w>": 226, | |
"-</w>": 227, | |
"Ĥ</w>": 228, | |
"Į</w>": 229, | |
"ħ</w>": 230, | |
"Ĭ</w>": 231, | |
"§</w>": 232, | |
"IJ</w>": 233, | |
"`</w>": 234, | |
"q</w>": 235, | |
"ķ</w>": 236, | |
"µ</w>": 237, | |
"ī</w>": 238, | |
"º</w>": 239, | |
"!</w>": 240, | |
"¯</w>": 241, | |
"Ŀ</w>": 242, | |
"Ħ</w>": 243, | |
"ł</w>": 244, | |
"%</w>": 245, | |
"£</w>": 246, | |
"¦</w>": 247, | |
"ŀ</w>": 248, | |
"¾</w>": 249, | |
"´</w>": 250, | |
"ĵ</w>": 251, | |
"^</w>": 252, | |
"Ķ</w>": 253, | |
"ª</w>": 254, | |
"»</w>": 255, | |
"0</w>": 256, | |
"5</w>": 257, | |
"ĸ</w>": 258, | |
"ļ</w>": 259, | |
"İ</w>": 260, | |
"=</w>": 261, | |
"Ĵ</w>": 262, | |
"Ī</w>": 263, | |
"ĥ</w>": 264, | |
"¥</w>": 265, | |
"®</w>": 266, | |
"3</w>": 267, | |
"¬</w>": 268, | |
".</w>": 269, | |
"1</w>": 270, | |
"ĭ</w>": 271, | |
"ij</w>": 272, | |
"@</w>": 273, | |
"&</w>": 274, | |
"ı</w>": 275, | |
"~</w>": 276, | |
"8</w>": 277, | |
"}</w>": 278, | |
"*</w>": 279, | |
";</w>": 280, | |
"\"</w>": 281, | |
"2</w>": 282, | |
"(</w>": 283, | |
")</w>": 284, | |
"4</w>": 285, | |
"9</w>": 286, | |
"/</w>": 287, | |
":</w>": 288, | |
"#</w>": 289, | |
",</w>": 290, | |
"$</w>": 291, | |
"]</w>": 292, | |
"<</w>": 293, | |
"_</w>": 294, | |
"?</w>": 295, | |
"6</w>": 296, | |
"7</w>": 297, | |
"th": 298, | |
"the</w>": 299, | |
"in": 300, | |
"an": 301, | |
"ed</w>": 302, | |
"er": 303, | |
"re": 304, | |
"ar": 305, | |
"ti": 306, | |
"on": 307, | |
"en": 308, | |
"of</w>": 309, | |
"or": 310, | |
"and</w>": 311, | |
"er</w>": 312, | |
"on</w>": 313, | |
"in</w>": 314, | |
"ing</w>": 315, | |
"st": 316, | |
"ro": 317, | |
"al": 318, | |
"it": 319, | |
"to</w>": 320, | |
"as</w>": 321, | |
"at": 322, | |
"es</w>": 323, | |
"ou": 324, | |
"hi": 325, | |
"ac": 326, | |
"si": 327, | |
"at</w>": 328, | |
"ri": 329, | |
"al</w>": 330, | |
"el": 331, | |
"an</w>": 332, | |
"am": 333, | |
"or</w>": 334, | |
"st</w>": 335, | |
"li": 336, | |
"ur": 337, | |
"ec": 338, | |
"om": 339, | |
"di": 340, | |
"was</w>": 341, | |
"ly</w>": 342, | |
"en</w>": 343, | |
"ea": 344, | |
"ch": 345, | |
"un": 346, | |
"tion</w>": 347, | |
"la": 348, | |
"is</w>": 349, | |
"fi": 350, | |
"ol": 351, | |
"de": 352, | |
"-@</w>": 353, | |
"@-@</w>": 354, | |
"ra": 355, | |
"vi": 356, | |
"le</w>": 357, | |
"lo": 358, | |
"sh": 359, | |
"em": 360, | |
"be": 361, | |
"that</w>": 362, | |
"'s</w>": 363, | |
"con": 364, | |
"ma": 365, | |
"for</w>": 366, | |
"ha": 367, | |
"su": 368, | |
"by</w>": 369, | |
"ith</w>": 370, | |
"ve</w>": 371, | |
"with</w>": 372, | |
"se</w>": 373, | |
"ch</w>": 374, | |
"the": 375, | |
"ent": 376, | |
"po": 377, | |
"ce</w>": 378, | |
"il": 379, | |
"se": 380, | |
"ent</w>": 381, | |
"le": 382, | |
"com": 383, | |
"sp": 384, | |
"ere</w>": 385, | |
"pro": 386, | |
"no": 387, | |
"bu": 388, | |
"wh": 389, | |
"it</w>": 390, | |
"th</w>": 391, | |
"ver": 392, | |
"ne": 393, | |
"ca": 394, | |
"is": 395, | |
"for": 396, | |
"ag": 397, | |
"ers</w>": 398, | |
"mo": 399, | |
"gh": 400, | |
"fro": 401, | |
"ted</w>": 402, | |
"from</w>": 403, | |
"tion": 404, | |
"op": 405, | |
"his</w>": 406, | |
"ad": 407, | |
"ab": 408, | |
"ic": 409, | |
"he</w>": 410, | |
"oun": 411, | |
"as": 412, | |
"ts</w>": 413, | |
"sc": 414, | |
"de</w>": 415, | |
"ow": 416, | |
"ex": 417, | |
"whi": 418, | |
"ru": 419, | |
"ter</w>": 420, | |
"ap": 421, | |
"ds</w>": 422, | |
"were</w>": 423, | |
"pre": 424, | |
"du": 425, | |
"gu": 426, | |
"par": 427, | |
"ir": 428, | |
"bo": 429, | |
"ther</w>": 430, | |
"qu": 431, | |
"lu": 432, | |
"ter": 433, | |
"tw": 434, | |
"es": 435, | |
"rec": 436, | |
"per": 437, | |
"ta": 438, | |
"ate</w>": 439, | |
"ver</w>": 440, | |
"ated</w>": 441, | |
"ding</w>": 442, | |
"ity</w>": 443, | |
"man": 444, | |
"ear": 445, | |
"sed</w>": 446, | |
"ded</w>": 447, | |
"au": 448, | |
"all</w>": 449, | |
"ame</w>": 450, | |
"ci": 451, | |
"one</w>": 452, | |
"ing": 453, | |
"are</w>": 454, | |
"af": 455, | |
"ir</w>": 456, | |
"ation</w>": 457, | |
"âĢ": 458, | |
"had</w>": 459, | |
"tr": 460, | |
"ul": 461, | |
"ld</w>": 462, | |
"which</w>": 463, | |
"wa": 464, | |
"im": 465, | |
"lea": 466, | |
"be</w>": 467, | |
"to": 468, | |
"tim": 469, | |
"fir": 470, | |
"wor": 471, | |
"ong</w>": 472, | |
"por": 473, | |
"mar": 474, | |
"me": 475, | |
"ally</w>": 476, | |
"so</w>": 477, | |
"out</w>": 478, | |
"tions</w>": 479, | |
"its</w>": 480, | |
"gh</w>": 481, | |
"ge</w>": 482, | |
"ber</w>": 483, | |
"fe": 484, | |
"pu": 485, | |
"ser": 486, | |
"der": 487, | |
"pl": 488, | |
"ss</w>": 489, | |
"ine</w>": 490, | |
"inc": 491, | |
"mi": 492, | |
"ght</w>": 493, | |
"go": 494, | |
"this</w>": 495, | |
"tur": 496, | |
"da": 497, | |
"rou": 498, | |
"but</w>": 499, | |
"um": 500, | |
"son</w>": 501, | |
"we": 502, | |
"ved</w>": 503, | |
"sion</w>": 504, | |
"ke</w>": 505, | |
"pla": 506, | |
"their</w>": 507, | |
"ies</w>": 508, | |
"first</w>": 509, | |
"sa": 510, | |
"oc": 511, | |
"att": 512, | |
"of": 513, | |
"pe": 514, | |
"not</w>": 515, | |
"gi": 516, | |
"na": 517, | |
"ary</w>": 518, | |
"mu": 519, | |
"led</w>": 520, | |
"âĢĵ</w>": 521, | |
"her</w>": 522, | |
"ran": 523, | |
"co": 524, | |
"they</w>": 525, | |
"der</w>": 526, | |
"ali": 527, | |
"also</w>": 528, | |
"ore</w>": 529, | |
"ep": 530, | |
"ould</w>": 531, | |
"after</w>": 532, | |
"shi": 533, | |
"us</w>": 534, | |
"et</w>": 535, | |
"tic": 536, | |
"stor": 537, | |
"wi": 538, | |
"ev": 539, | |
"other</w>": 540, | |
"sh</w>": 541, | |
"ting</w>": 542, | |
"ard</w>": 543, | |
"te": 544, | |
"two</w>": 545, | |
"ni": 546, | |
"have</w>": 547, | |
"our": 548, | |
"comm": 549, | |
"te</w>": 550, | |
"ack</w>": 551, | |
"oo": 552, | |
"fin": 553, | |
"sec": 554, | |
"ents</w>": 555, | |
"has</w>": 556, | |
"comp": 557, | |
"bec": 558, | |
"ks</w>": 559, | |
"cont": 560, | |
"land</w>": 561, | |
"been</w>": 562, | |
"ence</w>": 563, | |
"king</w>": 564, | |
"el</w>": 565, | |
"age</w>": 566, | |
"low": 567, | |
"min": 568, | |
".@</w>": 569, | |
"@.@</w>": 570, | |
"ome</w>": 571, | |
"ment</w>": 572, | |
"char": 573, | |
"ge": 574, | |
"ater</w>": 575, | |
"nor": 576, | |
"ho": 577, | |
"ous</w>": 578, | |
"who</w>": 579, | |
"ear</w>": 580, | |
"spec": 581, | |
"col": 582, | |
"ely</w>": 583, | |
"ty</w>": 584, | |
"jo": 585, | |
"uring</w>": 586, | |
"duc": 587, | |
"bri": 588, | |
"str": 589, | |
"can": 590, | |
"ori": 591, | |
"tra": 592, | |
"pa": 593, | |
"she</w>": 594, | |
"do": 595, | |
"tive</w>": 596, | |
"mon": 597, | |
"new</w>": 598, | |
"rit": 599, | |
"time</w>": 600, | |
"ons</w>": 601, | |
"so": 602, | |
"man</w>": 603, | |
"dec": 604, | |
"cent": 605, | |
"lan": 606, | |
"pi": 607, | |
"our</w>": 608, | |
"inter": 609, | |
"fer": 610, | |
"gra": 611, | |
"gre": 612, | |
"res</w>": 613, | |
"inclu": 614, | |
"mil": 615, | |
"during</w>": 616, | |
"own</w>": 617, | |
"pres": 618, | |
"ju": 619, | |
"ned</w>": 620, | |
"ell</w>": 621, | |
",@</w>": 622, | |
"@,@</w>": 623, | |
"ite</w>": 624, | |
"gen": 625, | |
"when</w>": 626, | |
"sig": 627, | |
"bi": 628, | |
"ren": 629, | |
"fa": 630, | |
"ga": 631, | |
"play": 632, | |
"eng": 633, | |
"tional</w>": 634, | |
"ound</w>": 635, | |
"thou": 636, | |
"more</w>": 637, | |
"ree</w>": 638, | |
"ember</w>": 639, | |
"ei": 640, | |
"sou": 641, | |
"sur": 642, | |
"sti": 643, | |
"car": 644, | |
"form": 645, | |
"lar": 646, | |
"ses</w>": 647, | |
"ten": 648, | |
"into</w>": 649, | |
"tu": 650, | |
"ces</w>": 651, | |
"most</w>": 652, | |
"ked</w>": 653, | |
"way</w>": 654, | |
"cre": 655, | |
"coun": 656, | |
"up</w>": 657, | |
"les</w>": 658, | |
"ace</w>": 659, | |
"als</w>": 660, | |
"ke": 661, | |
"would</w>": 662, | |
"ant</w>": 663, | |
"ber": 664, | |
"fu": 665, | |
"ited</w>": 666, | |
"pri": 667, | |
"while</w>": 668, | |
"over</w>": 669, | |
"ings</w>": 670, | |
"re</w>": 671, | |
"fil": 672, | |
"sy": 673, | |
"est": 674, | |
"able</w>": 675, | |
"wn</w>": 676, | |
"sea": 677, | |
"ach": 678, | |
"sing</w>": 679, | |
"ins</w>": 680, | |
"tic</w>": 681, | |
"id</w>": 682, | |
"only</w>": 683, | |
"ates</w>": 684, | |
"tri": 685, | |
"ving</w>": 686, | |
"ba": 687, | |
"vel": 688, | |
"ance</w>": 689, | |
"sta": 690, | |
"ern</w>": 691, | |
"fol": 692, | |
"een</w>": 693, | |
"ined</w>": 694, | |
"stru": 695, | |
"uni": 696, | |
"game</w>": 697, | |
"lar</w>": 698, | |
"sel": 699, | |
"bli": 700, | |
"used</w>": 701, | |
"ning</w>": 702, | |
"ps</w>": 703, | |
"ties</w>": 704, | |
"kno": 705, | |
"cor": 706, | |
"ft</w>": 707, | |
"recor": 708, | |
"ble</w>": 709, | |
"vie": 710, | |
"ys</w>": 711, | |
"wil": 712, | |
"ical</w>": 713, | |
"app": 714, | |
"tro": 715, | |
"three</w>": 716, | |
"cla": 717, | |
"old</w>": 718, | |
"shed</w>": 719, | |
"hea": 720, | |
"about</w>": 721, | |
"writ": 722, | |
"than</w>": 723, | |
"ste": 724, | |
"later</w>": 725, | |
"ari": 726, | |
"dy</w>": 727, | |
"publi": 728, | |
"loc": 729, | |
"aga": 730, | |
"throu": 731, | |
"ssi": 732, | |
"end</w>": 733, | |
"may</w>": 734, | |
"ang": 735, | |
"ach</w>": 736, | |
"ves</w>": 737, | |
"og": 738, | |
"him</w>": 739, | |
"betw": 740, | |
"though</w>": 741, | |
"between</w>": 742, | |
"um</w>": 743, | |
"star": 744, | |
"scri": 745, | |
"rea": 746, | |
"ond</w>": 747, | |
"ship</w>": 748, | |
"ok</w>": 749, | |
"hel": 750, | |
"song</w>": 751, | |
"chi": 752, | |
"cap": 753, | |
"ever</w>": 754, | |
"day</w>": 755, | |
"cri": 756, | |
"some</w>": 757, | |
"bro": 758, | |
"no</w>": 759, | |
"there</w>": 760, | |
"ans</w>": 761, | |
"all": 762, | |
"num": 763, | |
"red</w>": 764, | |
"ears</w>": 765, | |
"sts</w>": 766, | |
"any</w>": 767, | |
"war": 768, | |
"ph": 769, | |
"pp": 770, | |
"gin": 771, | |
"struc": 772, | |
"amer": 773, | |
"produc": 774, | |
"sch": 775, | |
"ces": 776, | |
"ure</w>": 777, | |
"ating</w>": 778, | |
"emp": 779, | |
"tor": 780, | |
"season</w>": 781, | |
"fore</w>": 782, | |
"ic</w>": 783, | |
"city</w>": 784, | |
"gro": 785, | |
"follow": 786, | |
"sub": 787, | |
"bel": 788, | |
"year</w>": 789, | |
"can</w>": 790, | |
"sin": 791, | |
"where</w>": 792, | |
"and": 793, | |
"made</w>": 794, | |
"relea": 795, | |
"sm": 796, | |
"bl": 797, | |
"ten</w>": 798, | |
"with": 799, | |
"son": 800, | |
"many</w>": 801, | |
"are": 802, | |
"ed": 803, | |
"how": 804, | |
"americ": 805, | |
"ury</w>": 806, | |
"stu": 807, | |
"musi": 808, | |
"cu": 809, | |
"nam": 810, | |
"ement</w>": 811, | |
"such</w>": 812, | |
"albu": 813, | |
"buil": 814, | |
"before</w>": 815, | |
"ef": 816, | |
"arm": 817, | |
"ton</w>": 818, | |
"them</w>": 819, | |
"cal": 820, | |
"bar": 821, | |
"des</w>": 822, | |
"mat": 823, | |
"gener": 824, | |
"od</w>": 825, | |
"series</w>": 826, | |
"cer": 827, | |
"sho": 828, | |
"enti": 829, | |
"her": 830, | |
"over": 831, | |
"ann": 832, | |
"well</w>": 833, | |
"world</w>": 834, | |
"gan</w>": 835, | |
"est</w>": 836, | |
"second</w>": 837, | |
"ters</w>": 838, | |
"side</w>": 839, | |
"tran": 840, | |
"line</w>": 841, | |
"ture</w>": 842, | |
"port</w>": 843, | |
"being</w>": 844, | |
"years</w>": 845, | |
"both</w>": 846, | |
"indi": 847, | |
"these</w>": 848, | |
"national</w>": 849, | |
"histor": 850, | |
"fe</w>": 851, | |
"vo": 852, | |
"sted</w>": 853, | |
"ani": 854, | |
"bas": 855, | |
"poin": 856, | |
"sing": 857, | |
"film</w>": 858, | |
"pen": 859, | |
"sup": 860, | |
"mis": 861, | |
"cro": 862, | |
"stri": 863, | |
"lin": 864, | |
"tre": 865, | |
"war</w>": 866, | |
"however</w>": 867, | |
"ying</w>": 868, | |
"ling</w>": 869, | |
"yp": 870, | |
"ected</w>": 871, | |
"direc": 872, | |
"vision</w>": 873, | |
"album</w>": 874, | |
"then</w>": 875, | |
"ll</w>": 876, | |
"sever": 877, | |
"through</w>": 878, | |
"known</w>": 879, | |
"bor": 880, | |
"cul": 881, | |
"clu": 882, | |
"ster</w>": 883, | |
"south</w>": 884, | |
"ry</w>": 885, | |
"ect</w>": 886, | |
"low</w>": 887, | |
"pr": 888, | |
"sk": 889, | |
"iso": 890, | |
"north</w>": 891, | |
"part</w>": 892, | |
"fac": 893, | |
"tly</w>": 894, | |
"peri": 895, | |
"eu": 896, | |
"batt": 897, | |
"state</w>": 898, | |
"ced</w>": 899, | |
"consi": 900, | |
"inf": 901, | |
"poli": 902, | |
"olog": 903, | |
"early</w>": 904, | |
"posi": 905, | |
"ames</w>": 906, | |
"win": 907, | |
"devel": 908, | |
"ob": 909, | |
"ve": 910, | |
"ven</w>": 911, | |
"oper": 912, | |
"ger": 913, | |
"offi": 914, | |
"charac": 915, | |
"ms</w>": 916, | |
"high": 917, | |
"ad</w>": 918, | |
"tho": 919, | |
"several</w>": 920, | |
"dre": 921, | |
"descri": 922, | |
"ale</w>": 923, | |
"number</w>": 924, | |
"air": 925, | |
"including</w>": 926, | |
"inst</w>": 927, | |
"against</w>": 928, | |
"ls</w>": 929, | |
"sul": 930, | |
"episo": 931, | |
"cam": 932, | |
"dif": 933, | |
"soci": 934, | |
"became</w>": 935, | |
"like</w>": 936, | |
"tel": 937, | |
"four</w>": 938, | |
"âĢĶ</w>": 939, | |
"hou": 940, | |
"joh": 941, | |
"united</w>": 942, | |
"inv": 943, | |
"under</w>": 944, | |
"nov": 945, | |
"tiv": 946, | |
"suc": 947, | |
"ations</w>": 948, | |
"ack": 949, | |
"tor</w>": 950, | |
"ron": 951, | |
"und</w>": 952, | |
"ws</w>": 953, | |
"fo": 954, | |
"gr": 955, | |
"develop": 956, | |
"although</w>": 957, | |
"contin": 958, | |
"west</w>": 959, | |
"origin": 960, | |
"music</w>": 961, | |
"ors</w>": 962, | |
"don</w>": 963, | |
"century</w>": 964, | |
"ward</w>": 965, | |
"work</w>": 966, | |
"me</w>": 967, | |
"ami": 968, | |
"cha": 969, | |
"very</w>": 970, | |
"har": 971, | |
"dis": 972, | |
"zed</w>": 973, | |
"do</w>": 974, | |
"gs</w>": 975, | |
"tow": 976, | |
"sol": 977, | |
"following</w>": 978, | |
"lion</w>": 979, | |
"rema": 980, | |
"ns</w>": 981, | |
"tish</w>": 982, | |
"chur": 983, | |
"som": 984, | |
"mp": 985, | |
"tle</w>": 986, | |
"gover": 987, | |
"del": 988, | |
"comple": 989, | |
"cur": 990, | |
"use</w>": 991, | |
"back</w>": 992, | |
"hu": 993, | |
"stern</w>": 994, | |
"began</w>": 995, | |
"fiel": 996, | |
"ause</w>": 997, | |
"dra": 998, | |
"pas": 999, | |
"bil": 1000, | |
"cation</w>": 1001, | |
"dent</w>": 1002, | |
"bed</w>": 1003, | |
"because</w>": 1004, | |
"ant": 1005, | |
"eam</w>": 1006, | |
"phi": 1007, | |
"yo": 1008, | |
"continu": 1009, | |
"tain</w>": 1010, | |
"try</w>": 1011, | |
"fre": 1012, | |
"peop": 1013, | |
"called</w>": 1014, | |
"found</w>": 1015, | |
"episode</w>": 1016, | |
"desig": 1017, | |
"mor": 1018, | |
"set</w>": 1019, | |
"ley</w>": 1020, | |
"east</w>": 1021, | |
"trac": 1022, | |
"cra": 1023 | |
}, | |
"merges": [ | |
"t h", | |
"th e</w>", | |
"i n", | |
"a n", | |
"e d</w>", | |
"e r", | |
"r e", | |
"a r", | |
"t i", | |
"o n", | |
"e n", | |
"o f</w>", | |
"o r", | |
"an d</w>", | |
"e r</w>", | |
"o n</w>", | |
"i n</w>", | |
"in g</w>", | |
"s t", | |
"r o", | |
"a l", | |
"i t", | |
"t o</w>", | |
"a s</w>", | |
"a t", | |
"e s</w>", | |
"o u", | |
"h i", | |
"a c", | |
"s i", | |
"a t</w>", | |
"r i", | |
"a l</w>", | |
"e l", | |
"a n</w>", | |
"a m", | |
"o r</w>", | |
"s t</w>", | |
"l i", | |
"u r", | |
"e c", | |
"o m", | |
"d i", | |
"w as</w>", | |
"l y</w>", | |
"e n</w>", | |
"e a", | |
"c h", | |
"u n", | |
"ti on</w>", | |
"l a", | |
"i s</w>", | |
"f i", | |
"o l", | |
"d e", | |
"- @</w>", | |
"@ -@</w>", | |
"r a", | |
"v i", | |
"l e</w>", | |
"l o", | |
"s h", | |
"e m", | |
"b e", | |
"th at</w>", | |
"' s</w>", | |
"c on", | |
"m a", | |
"f or</w>", | |
"h a", | |
"s u", | |
"b y</w>", | |
"it h</w>", | |
"v e</w>", | |
"w ith</w>", | |
"s e</w>", | |
"c h</w>", | |
"th e", | |
"en t", | |
"p o", | |
"c e</w>", | |
"i l", | |
"s e", | |
"en t</w>", | |
"l e", | |
"c om", | |
"s p", | |
"er e</w>", | |
"p ro", | |
"n o", | |
"b u", | |
"w h", | |
"i t</w>", | |
"t h</w>", | |
"v er", | |
"n e", | |
"c a", | |
"i s", | |
"f or", | |
"a g", | |
"er s</w>", | |
"m o", | |
"g h", | |
"f ro", | |
"t ed</w>", | |
"fro m</w>", | |
"ti on", | |
"o p", | |
"hi s</w>", | |
"a d", | |
"a b", | |
"i c", | |
"h e</w>", | |
"ou n", | |
"a s", | |
"t s</w>", | |
"s c", | |
"d e</w>", | |
"o w", | |
"e x", | |
"w hi", | |
"r u", | |
"t er</w>", | |
"a p", | |
"d s</w>", | |
"w ere</w>", | |
"p re", | |
"d u", | |
"g u", | |
"p ar", | |
"i r", | |
"b o", | |
"th er</w>", | |
"q u", | |
"l u", | |
"t er", | |
"t w", | |
"e s", | |
"re c", | |
"p er", | |
"t a", | |
"at e</w>", | |
"v er</w>", | |
"at ed</w>", | |
"d ing</w>", | |
"it y</w>", | |
"m an", | |
"e ar", | |
"s ed</w>", | |
"d ed</w>", | |
"a u", | |
"al l</w>", | |
"am e</w>", | |
"c i", | |
"on e</w>", | |
"in g", | |
"ar e</w>", | |
"a f", | |
"i r</w>", | |
"a tion</w>", | |
"â Ģ", | |
"ha d</w>", | |
"t r", | |
"u l", | |
"l d</w>", | |
"whi ch</w>", | |
"w a", | |
"i m", | |
"l ea", | |
"b e</w>", | |
"t o", | |
"ti m", | |
"fi r", | |
"w or", | |
"on g</w>", | |
"p or", | |
"m ar", | |
"m e", | |
"al ly</w>", | |
"s o</w>", | |
"ou t</w>", | |
"tion s</w>", | |
"it s</w>", | |
"g h</w>", | |
"g e</w>", | |
"b er</w>", | |
"f e", | |
"p u", | |
"s er", | |
"d er", | |
"p l", | |
"s s</w>", | |
"in e</w>", | |
"in c", | |
"m i", | |
"gh t</w>", | |
"g o", | |
"th is</w>", | |
"t ur", | |
"d a", | |
"ro u", | |
"bu t</w>", | |
"u m", | |
"s on</w>", | |
"w e", | |
"v ed</w>", | |
"si on</w>", | |
"k e</w>", | |
"p la", | |
"the ir</w>", | |
"i es</w>", | |
"fir st</w>", | |
"s a", | |
"o c", | |
"at t", | |
"o f", | |
"p e", | |
"no t</w>", | |
"g i", | |
"n a", | |
"ar y</w>", | |
"m u", | |
"l ed</w>", | |
"âĢ ĵ</w>", | |
"h er</w>", | |
"r an", | |
"c o", | |
"the y</w>", | |
"d er</w>", | |
"al i", | |
"al so</w>", | |
"or e</w>", | |
"e p", | |
"ou ld</w>", | |
"af ter</w>", | |
"s hi", | |
"u s</w>", | |
"e t</w>", | |
"ti c", | |
"st or", | |
"w i", | |
"e v", | |
"o ther</w>", | |
"s h</w>", | |
"t ing</w>", | |
"ar d</w>", | |
"t e", | |
"tw o</w>", | |
"n i", | |
"ha ve</w>", | |
"ou r", | |
"com m", | |
"t e</w>", | |
"ac k</w>", | |
"o o", | |
"f in", | |
"s ec", | |
"ent s</w>", | |
"h as</w>", | |
"com p", | |
"b ec", | |
"k s</w>", | |
"con t", | |
"l and</w>", | |
"be en</w>", | |
"en ce</w>", | |
"k ing</w>", | |
"e l</w>", | |
"ag e</w>", | |
"lo w", | |
"m in", | |
". @</w>", | |
"@ .@</w>", | |
"om e</w>", | |
"m ent</w>", | |
"ch ar", | |
"g e", | |
"at er</w>", | |
"n or", | |
"h o", | |
"ou s</w>", | |
"wh o</w>", | |
"ea r</w>", | |
"sp ec", | |
"c ol", | |
"el y</w>", | |
"t y</w>", | |
"j o", | |
"ur ing</w>", | |
"du c", | |
"b ri", | |
"st r", | |
"c an", | |
"or i", | |
"t ra", | |
"p a", | |
"sh e</w>", | |
"d o", | |
"ti ve</w>", | |
"m on", | |
"ne w</w>", | |
"r it", | |
"tim e</w>", | |
"on s</w>", | |
"s o", | |
"m an</w>", | |
"d ec", | |
"c ent", | |
"l an", | |
"p i", | |
"ou r</w>", | |
"in ter", | |
"f er", | |
"g ra", | |
"g re", | |
"re s</w>", | |
"inc lu", | |
"m il", | |
"d uring</w>", | |
"ow n</w>", | |
"pre s", | |
"j u", | |
"n ed</w>", | |
"el l</w>", | |
", @</w>", | |
"@ ,@</w>", | |
"it e</w>", | |
"g en", | |
"wh en</w>", | |
"si g", | |
"b i", | |
"re n", | |
"f a", | |
"g a", | |
"pla y", | |
"en g", | |
"tion al</w>", | |
"oun d</w>", | |
"th ou", | |
"m ore</w>", | |
"re e</w>", | |
"em ber</w>", | |
"e i", | |
"s ou", | |
"s ur", | |
"s ti", | |
"c ar", | |
"for m", | |
"l ar", | |
"s es</w>", | |
"t en", | |
"in to</w>", | |
"t u", | |
"c es</w>", | |
"mo st</w>", | |
"k ed</w>", | |
"wa y</w>", | |
"c re", | |
"c oun", | |
"u p</w>", | |
"l es</w>", | |
"ac e</w>", | |
"al s</w>", | |
"k e", | |
"w ould</w>", | |
"an t</w>", | |
"b er", | |
"f u", | |
"it ed</w>", | |
"p ri", | |
"whi le</w>", | |
"o ver</w>", | |
"ing s</w>", | |
"r e</w>", | |
"fi l", | |
"s y", | |
"e st", | |
"ab le</w>", | |
"w n</w>", | |
"s ea", | |
"ac h", | |
"s ing</w>", | |
"in s</w>", | |
"ti c</w>", | |
"i d</w>", | |
"on ly</w>", | |
"at es</w>", | |
"t ri", | |
"v ing</w>", | |
"b a", | |
"v el", | |
"an ce</w>", | |
"st a", | |
"er n</w>", | |
"f ol", | |
"e en</w>", | |
"in ed</w>", | |
"st ru", | |
"un i", | |
"g ame</w>", | |
"la r</w>", | |
"s el", | |
"b li", | |
"u sed</w>", | |
"n ing</w>", | |
"p s</w>", | |
"ti es</w>", | |
"k no", | |
"c or", | |
"f t</w>", | |
"rec or", | |
"b le</w>", | |
"vi e", | |
"y s</w>", | |
"w il", | |
"ic al</w>", | |
"ap p", | |
"t ro", | |
"th ree</w>", | |
"c la", | |
"ol d</w>", | |
"sh ed</w>", | |
"h ea", | |
"ab out</w>", | |
"w rit", | |
"th an</w>", | |
"st e", | |
"l ater</w>", | |
"ar i", | |
"d y</w>", | |
"pu bli", | |
"lo c", | |
"ag a", | |
"th rou", | |
"s si", | |
"en d</w>", | |
"ma y</w>", | |
"an g", | |
"ac h</w>", | |
"v es</w>", | |
"o g", | |
"hi m</w>", | |
"be tw", | |
"thou gh</w>", | |
"betw een</w>", | |
"u m</w>", | |
"st ar", | |
"sc ri", | |
"re a", | |
"on d</w>", | |
"shi p</w>", | |
"o k</w>", | |
"h el", | |
"s ong</w>", | |
"c hi", | |
"ca p", | |
"e ver</w>", | |
"da y</w>", | |
"c ri", | |
"s ome</w>", | |
"b ro", | |
"n o</w>", | |
"th ere</w>", | |
"an s</w>", | |
"al l", | |
"n um", | |
"r ed</w>", | |
"ear s</w>", | |
"st s</w>", | |
"an y</w>", | |
"w ar", | |
"p h", | |
"p p", | |
"g in", | |
"stru c", | |
"am er", | |
"pro duc", | |
"s ch", | |
"c es", | |
"ur e</w>", | |
"at ing</w>", | |
"em p", | |
"t or", | |
"sea son</w>", | |
"for e</w>", | |
"i c</w>", | |
"c ity</w>", | |
"g ro", | |
"fol low", | |
"su b", | |
"b el", | |
"y ear</w>", | |
"c an</w>", | |
"s in", | |
"wh ere</w>", | |
"an d", | |
"ma de</w>", | |
"re lea", | |
"s m", | |
"b l", | |
"t en</w>", | |
"wi th", | |
"s on", | |
"man y</w>", | |
"a re", | |
"e d", | |
"h ow", | |
"amer ic", | |
"ur y</w>", | |
"st u", | |
"mu si", | |
"c u", | |
"n am", | |
"em ent</w>", | |
"su ch</w>", | |
"al bu", | |
"bu il", | |
"be fore</w>", | |
"e f", | |
"ar m", | |
"t on</w>", | |
"the m</w>", | |
"c al", | |
"b ar", | |
"d es</w>", | |
"m at", | |
"gen er", | |
"o d</w>", | |
"ser ies</w>", | |
"c er", | |
"sh o", | |
"en ti", | |
"h er", | |
"o ver", | |
"an n", | |
"w ell</w>", | |
"wor ld</w>", | |
"g an</w>", | |
"e st</w>", | |
"sec ond</w>", | |
"t ers</w>", | |
"si de</w>", | |
"tr an", | |
"l ine</w>", | |
"tur e</w>", | |
"por t</w>", | |
"be ing</w>", | |
"y ears</w>", | |
"bo th</w>", | |
"in di", | |
"the se</w>", | |
"na tional</w>", | |
"hi stor", | |
"f e</w>", | |
"v o", | |
"st ed</w>", | |
"an i", | |
"b as", | |
"po in", | |
"s ing", | |
"fil m</w>", | |
"p en", | |
"su p", | |
"m is", | |
"c ro", | |
"st ri", | |
"l in", | |
"t re", | |
"wa r</w>", | |
"how ever</w>", | |
"y ing</w>", | |
"l ing</w>", | |
"y p", | |
"ec ted</w>", | |
"di rec", | |
"vi sion</w>", | |
"albu m</w>", | |
"th en</w>", | |
"l l</w>", | |
"se ver", | |
"throu gh</w>", | |
"kno wn</w>", | |
"b or", | |
"c ul", | |
"c lu", | |
"st er</w>", | |
"sou th</w>", | |
"r y</w>", | |
"ec t</w>", | |
"lo w</w>", | |
"p r", | |
"s k", | |
"is o", | |
"nor th</w>", | |
"par t</w>", | |
"f ac", | |
"t ly</w>", | |
"per i", | |
"e u", | |
"b att", | |
"st ate</w>", | |
"c ed</w>", | |
"con si", | |
"in f", | |
"po li", | |
"ol og", | |
"ear ly</w>", | |
"po si", | |
"am es</w>", | |
"w in", | |
"de vel", | |
"o b", | |
"v e", | |
"v en</w>", | |
"op er", | |
"g er", | |
"of fi", | |
"char ac", | |
"m s</w>", | |
"hi gh", | |
"a d</w>", | |
"th o", | |
"sever al</w>", | |
"d re", | |
"de scri", | |
"al e</w>", | |
"num ber</w>", | |
"a ir", | |
"inclu ding</w>", | |
"in st</w>", | |
"aga inst</w>", | |
"l s</w>", | |
"su l", | |
"ep iso", | |
"c am", | |
"di f", | |
"so ci", | |
"bec ame</w>", | |
"li ke</w>", | |
"t el", | |
"f our</w>", | |
"âĢ Ķ</w>", | |
"h ou", | |
"jo h", | |
"un ited</w>", | |
"in v", | |
"un der</w>", | |
"no v", | |
"ti v", | |
"su c", | |
"a tions</w>", | |
"ac k", | |
"t or</w>", | |
"r on", | |
"un d</w>", | |
"w s</w>", | |
"f o", | |
"g r", | |
"devel op", | |
"al though</w>", | |
"cont in", | |
"we st</w>", | |
"ori gin", | |
"musi c</w>", | |
"or s</w>", | |
"d on</w>", | |
"cent ury</w>", | |
"w ard</w>", | |
"wor k</w>", | |
"m e</w>", | |
"am i", | |
"ch a", | |
"ver y</w>", | |
"h ar", | |
"di s", | |
"z ed</w>", | |
"d o</w>", | |
"g s</w>", | |
"t ow", | |
"s ol", | |
"follow ing</w>", | |
"li on</w>", | |
"re ma", | |
"n s</w>", | |
"ti sh</w>", | |
"ch ur", | |
"s om", | |
"m p", | |
"t le</w>", | |
"go ver", | |
"d el", | |
"comp le", | |
"c ur", | |
"u se</w>", | |
"b ack</w>", | |
"h u", | |
"st ern</w>", | |
"be gan</w>", | |
"fi el", | |
"au se</w>", | |
"d ra", | |
"p as", | |
"b il", | |
"ca tion</w>", | |
"d ent</w>", | |
"b ed</w>", | |
"bec ause</w>", | |
"an t", | |
"ea m</w>", | |
"p hi", | |
"y o", | |
"contin u", | |
"ta in</w>", | |
"tr y</w>", | |
"f re", | |
"pe op", | |
"cal led</w>", | |
"f ound</w>", | |
"episo de</w>", | |
"de sig", | |
"m or", | |
"se t</w>", | |
"le y</w>", | |
"ea st</w>", | |
"tr ac", | |
"c ra" | |
] | |
} | |
} |