tokenizer_g2pen / tokenizer.json
therealvul's picture
Upload 3 files
9c575ce verified
raw
history blame
14 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Whitespace"
},
{
"type": "Punctuation",
"behavior": "Isolated"
}
]
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"[UNK]": 0,
"[PAD]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"!": 5,
"'": 6,
",": 7,
"-": 8,
".": 9,
"0": 10,
"1": 11,
"2": 12,
"?": 13,
"A": 14,
"B": 15,
"C": 16,
"D": 17,
"E": 18,
"F": 19,
"G": 20,
"H": 21,
"I": 22,
"J": 23,
"K": 24,
"L": 25,
"M": 26,
"N": 27,
"O": 28,
"P": 29,
"R": 30,
"S": 31,
"T": 32,
"U": 33,
"V": 34,
"W": 35,
"Y": 36,
"Z": 37,
"H0": 38,
"AH0": 39,
"H1": 40,
"Y1": 41,
"W1": 42,
"IH0": 43,
"AE": 44,
"IH1": 45,
"AH0N": 46,
"ER": 47,
"EH1": 48,
"AE1": 49,
"AA": 50,
"AH1": 51,
"AA1": 52,
"ER0": 53,
"Y0": 54,
"IY0": 55,
"UW1": 56,
"DH": 57,
"AO": 58,
"AY1": 59,
"IY1": 60,
"EY1": 61,
"AH0L": 62,
"AO1": 63,
"IH0N": 64,
"OW1": 65,
"ST": 66,
"HH": 67,
"SH": 68,
"AA1R": 69,
"DHAH0": 70,
"AO1R": 71,
"AE1N": 72,
"IH1Z": 73,
"JH": 74,
"AH1V": 75,
"TS": 76,
"H2": 77,
"Y2": 78,
"AH0ND": 79,
"TUW1": 80,
"IH0NG": 81,
"ER1": 82,
"EH1N": 83,
"SHAH0N": 84,
"CH": 85,
"AH0T": 86,
"PR": 87,
"IH0K": 88,
"AH0M": 89,
"EH1R": 90,
"YUW1": 91,
"AH1M": 92,
"TH": 93,
"HHAE1": 94,
"AH0S": 95,
"AW1": 96,
"AH0NT": 97,
"W0": 98,
"IH1N": 99,
"ER0Z": 100,
"AH0D": 101,
"AH0Z": 102,
"HHAE1V": 103,
"EH1L": 104,
"RIY0": 105,
"AE1T": 106,
"KT": 107,
"EH2": 108,
"KS": 109,
"AA1N": 110,
"FAO1R": 111,
"AH1N": 112,
"LIY0": 113,
"EY1SHAH0N": 114,
"MOW1": 115,
"UH1": 116,
"EY2": 117,
"DZ": 118,
"AY2": 119,
"SAH1M": 120,
"IH2": 121,
"AO1L": 122,
"DIH0": 123,
"WIH1": 124,
"PL": 125,
"AH0K": 126,
"W2": 127,
"OW0": 128,
"TER0": 129,
"GR": 130,
"LAY1": 131,
"TR": 132,
"SP": 133,
"AH0B": 134,
"AH0LZ": 135,
"MOW1ST": 136,
"EY1N": 137,
"KAE1N": 138,
"FR": 139,
"IY0Z": 140,
"MAH0N": 141,
"IH1L": 142,
"DHAE1T": 143,
"AE2": 144,
"AH0NS": 145,
"ND": 146,
"IH1T": 147,
"AA1RT": 148,
"KAA1": 149,
"KR": 150,
"OW1N": 151,
"AA2": 152,
"IH0V": 153,
"AH0P": 154,
"OW2": 155,
"UW0": 156,
"AA1L": 157,
"AH0TIY0": 158,
"IH0Z": 159,
"STR": 160,
"BIY1": 161,
"SK": 162,
"IH1NG": 163,
"EY2T": 164,
"AH0JH": 165,
"PAA1RT": 166,
"NZ": 167,
"EH1K": 168,
"DHEH1R": 169,
"BR": 170,
"IY1Z": 171,
"IY1T": 172,
"AE1Z": 173,
"AH0NZ": 174,
"VER0": 175,
"WIH1DH": 176,
"AH1T": 177,
"BAY1": 178,
"EH1T": 179,
"DR": 180,
"PAH0L": 181,
"OW1L": 182,
"YUW1Z": 183,
"WAY1": 184,
"AH0F": 185,
"IY2": 186,
"AE0": 187,
"EH1S": 188,
"EY1T": 189,
"EH0": 190,
"EH1M": 191,
"AO2": 192,
"FL": 193,
"MEH1N": 194,
"AE1K": 195,
"BL": 196,
"EH1D": 197,
"PER0": 198,
"IH1R": 199,
"EH1KT": 200,
"OY1": 201,
"MZ": 202,
"IH1F": 203,
"EH1ST": 204,
"MEY1": 205,
"WIY1": 206,
"DHER0": 207,
"CHER0": 208,
"LZ": 209,
"AY0": 210,
"KL": 211,
"DIY0": 212,
"GZ": 213,
"IH1ST": 214,
"VAY1": 215,
"EH1RIY0": 216,
"UH1R": 217,
"DER0": 218,
"AE1L": 219,
"AW1T": 220,
"FRAH1M": 221,
"AH1L": 222,
"MP": 223,
"NOW1": 224,
"AH0ST": 225,
"TAY1": 226,
"PIY1": 227,
"AH0LIY0": 228,
"AE1S": 229,
"WER1": 230,
"IH1S": 231,
"PS": 232,
"IH0T": 233,
"IH2N": 234,
"PRAA1": 235,
"IH0L": 236,
"AA1T": 237,
"SAY1": 238,
"KAH0M": 239,
"MIY1": 240,
"WAH1N": 241,
"WAO1": 242,
"AH0NTS": 243,
"PLAE1N": 244,
"SIY1": 245,
"SHAH0NZ": 246,
"UH1D": 247,
"PIY1PAH0L": 248,
"MAY1": 249,
"VZ": 250,
"MEH1NIY0": 251,
"AE1M": 252,
"MAH0NT": 253,
"UW2": 254,
"AH0W": 255,
"HHEH1L": 256,
"YAH0L": 257,
"IH1RIY0": 258,
"WAO1TER0": 259,
"LOW1": 260,
"KAH0NT": 261,
"AH1ST": 262,
"FIY1": 263,
"AH0MZ": 264,
"MAO1R": 265,
"EY1S": 266,
"AE1KT": 267,
"AH2": 268,
"DUW1": 269,
"AH0BAH0L": 270,
"KAA1Z": 271,
"SEH1L": 272,
"AO1RT": 273,
"YUW0": 274,
"LIY1": 275,
"THR": 276,
"EH1ND": 277,
"UW1D": 278,
"BAH1T": 279,
"SER1": 280,
"NT": 281,
"SOW1": 282,
"FAY1": 283,
"AO1LS": 284,
"AH0G": 285,
"NG": 286,
"DHEY1": 287,
"AH1DHER0": 288,
"AO1LSOW0": 289,
"HHAY1": 290,
"LIH1": 291,
"AH1NG": 292,
"EH1NT": 293,
"WEY1": 294,
"AH0V": 295,
"PRAH0D": 296,
"AW1ER0": 297,
"IH0KAH0L": 298,
"SHAH0L": 299,
"AE1NAH0M": 300,
"DHAE1N": 301,
"HHAE1Z": 302,
"EH1V": 303,
"NAH0S": 304,
"AY1T": 305,
"UW1S": 306,
"RIH1": 307,
"KAE1": 308,
"LD": 309,
"VEH1RIY0": 310,
"RIY1": 311,
"OW1Z": 312,
"SER0": 313,
"IH0KS": 314,
"LAY1K": 315,
"AY2Z": 316,
"WEH1N": 317,
"PT": 318,
"KAH0N": 319,
"NUW1": 320,
"AW1ND": 321,
"RIH0": 322,
"FER0": 323,
"AO1NG": 324,
"AE1ND": 325,
"EY1NJH": 326,
"AH0TS": 327,
"EY1SHAH0NZ": 328,
"WAH1T": 329,
"SEH2": 330,
"DIH0Z": 331,
"PLAE1NTS": 332,
"ER1N": 333,
"THIH1NG": 334,
"0R": 335,
"SIH1": 336,
"GROW1": 337,
"EH1NER0": 338,
"AO2R": 339,
"KW": 340,
"WIH1CH": 341,
"AA1KS": 342,
"SAH1": 343,
"IH2Z": 344,
"AO1F": 345,
"EY2TAH0D": 346,
"MAH1": 347,
"WEH1L": 348,
"AE1NAH0MAH0LZ": 349,
"IY1V": 350,
"EH1VER0": 351,
"HHYUW1": 352,
"TEY1": 353,
"IH0MP": 354,
"SEY1": 355,
"JHAH1ST": 356,
"AY1M": 357,
"AH1P": 358,
"KAH1M": 359,
"EH2RIY0": 360,
"IH1SHAH0N": 361,
"IY1N": 362,
"OW1T": 363,
"BAA1": 364,
"DIH1F": 365,
"IH1K": 366,
"BIH0K": 367,
"BIH0": 368,
"AW1N": 369,
"SM": 370,
"EY1Z": 371,
"MAH0T": 372,
"IH0D": 373,
"KIH0NG": 374,
"WAA1": 375,
"TAY1M": 376,
"HHYUW1MAH0N": 377,
"BER1": 378,
"HHIY1": 379,
"DHIH1S": 380,
"AE1NG": 381,
"IH1V": 382,
"FUW1D": 383
},
"merges": [
"H 0",
"A H0",
"H 1",
"Y 1",
"W 1",
"I H0",
"A E",
"I H1",
"AH0 N",
"E R",
"E H1",
"AE 1",
"A A",
"A H1",
"AA 1",
"ER 0",
"Y 0",
"I Y0",
"U W1",
"D H",
"A O",
"A Y1",
"I Y1",
"E Y1",
"AH0 L",
"AO 1",
"IH0 N",
"O W1",
"S T",
"H H",
"S H",
"AA1 R",
"DH AH0",
"AO1 R",
"AE1 N",
"IH1 Z",
"J H",
"AH1 V",
"T S",
"H 2",
"Y 2",
"AH0N D",
"T UW1",
"IH0N G",
"ER 1",
"EH1 N",
"SH AH0N",
"C H",
"AH0 T",
"P R",
"IH0 K",
"AH0 M",
"EH1 R",
"Y UW1",
"AH1 M",
"T H",
"HH AE1",
"AH0 S",
"A W1",
"AH0N T",
"W 0",
"IH1 N",
"ER0 Z",
"AH0 D",
"AH0 Z",
"HHAE1 V",
"EH1 L",
"R IY0",
"AE1 T",
"K T",
"E H2",
"K S",
"AA1 N",
"F AO1R",
"AH1 N",
"L IY0",
"EY1 SHAH0N",
"M OW1",
"U H1",
"E Y2",
"D Z",
"A Y2",
"S AH1M",
"I H2",
"AO1 L",
"D IH0",
"W IH1",
"P L",
"AH0 K",
"W 2",
"O W0",
"T ER0",
"G R",
"L AY1",
"T R",
"S P",
"AH0 B",
"AH0L Z",
"MOW1 ST",
"EY1 N",
"K AE1N",
"F R",
"IY0 Z",
"M AH0N",
"IH1 L",
"DH AE1T",
"AE 2",
"AH0N S",
"N D",
"IH1 T",
"AA1R T",
"K AA1",
"K R",
"OW1 N",
"AA 2",
"IH0 V",
"AH0 P",
"O W2",
"U W0",
"AA1 L",
"AH0T IY0",
"IH0 Z",
"ST R",
"B IY1",
"S K",
"IH1N G",
"EY2 T",
"AH0 JH",
"P AA1RT",
"N Z",
"EH1 K",
"DH EH1R",
"B R",
"IY1 Z",
"IY1 T",
"AE1 Z",
"AH0N Z",
"V ER0",
"WIH1 DH",
"AH1 T",
"B AY1",
"EH1 T",
"D R",
"P AH0L",
"OW1 L",
"YUW1 Z",
"W AY1",
"AH0 F",
"I Y2",
"AE 0",
"EH1 S",
"EY1 T",
"E H0",
"EH1 M",
"AO 2",
"F L",
"M EH1N",
"AE1 K",
"B L",
"EH1 D",
"P ER0",
"IH1 R",
"EH1 KT",
"O Y1",
"M Z",
"IH1 F",
"EH1 ST",
"M EY1",
"W IY1",
"DH ER0",
"CH ER0",
"L Z",
"A Y0",
"K L",
"D IY0",
"G Z",
"IH1 ST",
"V AY1",
"EH1R IY0",
"UH1 R",
"D ER0",
"AE1 L",
"AW1 T",
"FR AH1M",
"AH1 L",
"M P",
"N OW1",
"AH0 ST",
"T AY1",
"P IY1",
"AH0L IY0",
"AE1 S",
"W ER1",
"IH1 S",
"P S",
"IH0 T",
"IH2 N",
"PR AA1",
"IH0 L",
"AA1 T",
"S AY1",
"K AH0M",
"M IY1",
"W AH1N",
"W AO1",
"AH0N TS",
"PL AE1N",
"S IY1",
"SHAH0N Z",
"UH1 D",
"PIY1 PAH0L",
"M AY1",
"V Z",
"MEH1N IY0",
"AE1 M",
"M AH0NT",
"U W2",
"AH0 W",
"HH EH1L",
"Y AH0L",
"IH1 RIY0",
"WAO1 TER0",
"L OW1",
"K AH0NT",
"AH1 ST",
"F IY1",
"AH0M Z",
"M AO1R",
"EY1 S",
"AE1 KT",
"A H2",
"D UW1",
"AH0B AH0L",
"KAA1 Z",
"S EH1L",
"AO1R T",
"Y UW0",
"L IY1",
"TH R",
"EH1N D",
"UW1 D",
"B AH1T",
"S ER1",
"N T",
"S OW1",
"F AY1",
"AO1L S",
"AH0 G",
"N G",
"DH EY1",
"AH1 DHER0",
"AO1LS OW0",
"HH AY1",
"L IH1",
"AH1N G",
"EH1N T",
"W EY1",
"AH0 V",
"PR AH0D",
"AW1 ER0",
"IH0K AH0L",
"SH AH0L",
"AE1N AH0M",
"DH AE1N",
"HHAE1 Z",
"EH1 V",
"N AH0S",
"AY1 T",
"UW1 S",
"R IH1",
"K AE1",
"L D",
"V EH1RIY0",
"R IY1",
"OW1 Z",
"S ER0",
"IH0K S",
"LAY1 K",
"AY2 Z",
"W EH1N",
"P T",
"K AH0N",
"N UW1",
"AW1 ND",
"R IH0",
"F ER0",
"AO1 NG",
"AE1N D",
"EY1N JH",
"AH0 TS",
"EY1SHAH0N Z",
"W AH1T",
"S EH2",
"DIH0 Z",
"PLAE1N TS",
"ER1 N",
"TH IH1NG",
"0 R",
"S IH1",
"GR OW1",
"EH1N ER0",
"AO2 R",
"K W",
"WIH1 CH",
"AA1 KS",
"S AH1",
"IH2 Z",
"AO1 F",
"EY2T AH0D",
"M AH1",
"W EH1L",
"AE1NAH0M AH0LZ",
"IY1 V",
"EH1 VER0",
"HH YUW1",
"T EY1",
"IH0 MP",
"S EY1",
"JH AH1ST",
"AY1 M",
"AH1 P",
"K AH1M",
"EH2 RIY0",
"IH1 SHAH0N",
"IY1 N",
"OW1 T",
"B AA1",
"D IH1F",
"IH1 K",
"B IH0K",
"B IH0",
"AW1 N",
"S M",
"EY1 Z",
"M AH0T",
"IH0 D",
"K IH0NG",
"W AA1",
"TAY1 M",
"HHYUW1 MAH0N",
"B ER1",
"HH IY1",
"DH IH1S",
"AE1N G",
"IH1 V",
"F UW1D"
]
}
}