|
{
|
|
"added_tokens_decoder": {
|
|
"0": {
|
|
"content": "<s>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"1": {
|
|
"content": "<pad>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"2": {
|
|
"content": "</s>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256001": {
|
|
"content": "ace_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256002": {
|
|
"content": "ace_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256003": {
|
|
"content": "acm_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256004": {
|
|
"content": "acq_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256005": {
|
|
"content": "aeb_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256006": {
|
|
"content": "afr_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256007": {
|
|
"content": "ajp_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256008": {
|
|
"content": "aka_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256009": {
|
|
"content": "amh_Ethi",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256010": {
|
|
"content": "apc_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256011": {
|
|
"content": "arb_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256012": {
|
|
"content": "ars_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256013": {
|
|
"content": "ary_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256014": {
|
|
"content": "arz_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256015": {
|
|
"content": "asm_Beng",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256016": {
|
|
"content": "ast_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256017": {
|
|
"content": "awa_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256018": {
|
|
"content": "ayr_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256019": {
|
|
"content": "azb_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256020": {
|
|
"content": "azj_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256021": {
|
|
"content": "bak_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256022": {
|
|
"content": "bam_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256023": {
|
|
"content": "ban_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256024": {
|
|
"content": "bel_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256025": {
|
|
"content": "bem_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256026": {
|
|
"content": "ben_Beng",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256027": {
|
|
"content": "bho_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256028": {
|
|
"content": "bjn_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256029": {
|
|
"content": "bjn_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256030": {
|
|
"content": "bod_Tibt",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256031": {
|
|
"content": "bos_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256032": {
|
|
"content": "bug_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256033": {
|
|
"content": "bul_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256034": {
|
|
"content": "cat_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256035": {
|
|
"content": "ceb_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256036": {
|
|
"content": "ces_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256037": {
|
|
"content": "cjk_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256038": {
|
|
"content": "ckb_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256039": {
|
|
"content": "crh_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256040": {
|
|
"content": "cym_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256041": {
|
|
"content": "dan_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256042": {
|
|
"content": "deu_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256043": {
|
|
"content": "dik_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256044": {
|
|
"content": "dyu_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256045": {
|
|
"content": "dzo_Tibt",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256046": {
|
|
"content": "ell_Grek",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256047": {
|
|
"content": "eng_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256048": {
|
|
"content": "epo_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256049": {
|
|
"content": "est_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256050": {
|
|
"content": "eus_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256051": {
|
|
"content": "ewe_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256052": {
|
|
"content": "fao_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256053": {
|
|
"content": "pes_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256054": {
|
|
"content": "fij_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256055": {
|
|
"content": "fin_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256056": {
|
|
"content": "fon_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256057": {
|
|
"content": "fra_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256058": {
|
|
"content": "fur_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256059": {
|
|
"content": "fuv_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256060": {
|
|
"content": "gla_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256061": {
|
|
"content": "gle_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256062": {
|
|
"content": "glg_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256063": {
|
|
"content": "grn_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256064": {
|
|
"content": "guj_Gujr",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256065": {
|
|
"content": "hat_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256066": {
|
|
"content": "hau_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256067": {
|
|
"content": "heb_Hebr",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256068": {
|
|
"content": "hin_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256069": {
|
|
"content": "hne_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256070": {
|
|
"content": "hrv_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256071": {
|
|
"content": "hun_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256072": {
|
|
"content": "hye_Armn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256073": {
|
|
"content": "ibo_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256074": {
|
|
"content": "ilo_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256075": {
|
|
"content": "ind_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256076": {
|
|
"content": "isl_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256077": {
|
|
"content": "ita_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256078": {
|
|
"content": "jav_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256079": {
|
|
"content": "jpn_Jpan",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256080": {
|
|
"content": "kab_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256081": {
|
|
"content": "kac_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256082": {
|
|
"content": "kam_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256083": {
|
|
"content": "kan_Knda",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256084": {
|
|
"content": "kas_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256085": {
|
|
"content": "kas_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256086": {
|
|
"content": "kat_Geor",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256087": {
|
|
"content": "knc_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256088": {
|
|
"content": "knc_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256089": {
|
|
"content": "kaz_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256090": {
|
|
"content": "kbp_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256091": {
|
|
"content": "kea_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256092": {
|
|
"content": "khm_Khmr",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256093": {
|
|
"content": "kik_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256094": {
|
|
"content": "kin_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256095": {
|
|
"content": "kir_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256096": {
|
|
"content": "kmb_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256097": {
|
|
"content": "kon_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256098": {
|
|
"content": "kor_Hang",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256099": {
|
|
"content": "kmr_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256100": {
|
|
"content": "lao_Laoo",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256101": {
|
|
"content": "lvs_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256102": {
|
|
"content": "lij_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256103": {
|
|
"content": "lim_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256104": {
|
|
"content": "lin_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256105": {
|
|
"content": "lit_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256106": {
|
|
"content": "lmo_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256107": {
|
|
"content": "ltg_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256108": {
|
|
"content": "ltz_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256109": {
|
|
"content": "lua_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256110": {
|
|
"content": "lug_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256111": {
|
|
"content": "luo_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256112": {
|
|
"content": "lus_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256113": {
|
|
"content": "mag_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256114": {
|
|
"content": "mai_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256115": {
|
|
"content": "mal_Mlym",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256116": {
|
|
"content": "mar_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256117": {
|
|
"content": "min_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256118": {
|
|
"content": "mkd_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256119": {
|
|
"content": "plt_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256120": {
|
|
"content": "mlt_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256121": {
|
|
"content": "mni_Beng",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256122": {
|
|
"content": "khk_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256123": {
|
|
"content": "mos_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256124": {
|
|
"content": "mri_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256125": {
|
|
"content": "zsm_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256126": {
|
|
"content": "mya_Mymr",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256127": {
|
|
"content": "nld_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256128": {
|
|
"content": "nno_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256129": {
|
|
"content": "nob_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256130": {
|
|
"content": "npi_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256131": {
|
|
"content": "nso_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256132": {
|
|
"content": "nus_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256133": {
|
|
"content": "nya_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256134": {
|
|
"content": "oci_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256135": {
|
|
"content": "gaz_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256136": {
|
|
"content": "ory_Orya",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256137": {
|
|
"content": "pag_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256138": {
|
|
"content": "pan_Guru",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256139": {
|
|
"content": "pap_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256140": {
|
|
"content": "pol_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256141": {
|
|
"content": "por_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256142": {
|
|
"content": "prs_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256143": {
|
|
"content": "pbt_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256144": {
|
|
"content": "quy_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256145": {
|
|
"content": "ron_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256146": {
|
|
"content": "run_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256147": {
|
|
"content": "rus_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256148": {
|
|
"content": "sag_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256149": {
|
|
"content": "san_Deva",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256150": {
|
|
"content": "sat_Beng",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256151": {
|
|
"content": "scn_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256152": {
|
|
"content": "shn_Mymr",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256153": {
|
|
"content": "sin_Sinh",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256154": {
|
|
"content": "slk_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256155": {
|
|
"content": "slv_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256156": {
|
|
"content": "smo_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256157": {
|
|
"content": "sna_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256158": {
|
|
"content": "snd_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256159": {
|
|
"content": "som_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256160": {
|
|
"content": "sot_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256161": {
|
|
"content": "spa_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256162": {
|
|
"content": "als_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256163": {
|
|
"content": "srd_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256164": {
|
|
"content": "srp_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256165": {
|
|
"content": "ssw_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256166": {
|
|
"content": "sun_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256167": {
|
|
"content": "swe_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256168": {
|
|
"content": "swh_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256169": {
|
|
"content": "szl_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256170": {
|
|
"content": "tam_Taml",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256171": {
|
|
"content": "tat_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256172": {
|
|
"content": "tel_Telu",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256173": {
|
|
"content": "tgk_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256174": {
|
|
"content": "tgl_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256175": {
|
|
"content": "tha_Thai",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256176": {
|
|
"content": "tir_Ethi",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256177": {
|
|
"content": "taq_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256178": {
|
|
"content": "taq_Tfng",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256179": {
|
|
"content": "tpi_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256180": {
|
|
"content": "tsn_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256181": {
|
|
"content": "tso_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256182": {
|
|
"content": "tuk_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256183": {
|
|
"content": "tum_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256184": {
|
|
"content": "tur_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256185": {
|
|
"content": "twi_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256186": {
|
|
"content": "tzm_Tfng",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256187": {
|
|
"content": "uig_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256188": {
|
|
"content": "ukr_Cyrl",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256189": {
|
|
"content": "umb_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256190": {
|
|
"content": "urd_Arab",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256191": {
|
|
"content": "uzn_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256192": {
|
|
"content": "vec_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256193": {
|
|
"content": "vie_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256194": {
|
|
"content": "war_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256195": {
|
|
"content": "wol_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256196": {
|
|
"content": "xho_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256197": {
|
|
"content": "ydd_Hebr",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256198": {
|
|
"content": "yor_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256199": {
|
|
"content": "yue_Hant",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256200": {
|
|
"content": "zho_Hans",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256201": {
|
|
"content": "zho_Hant",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256202": {
|
|
"content": "zul_Latn",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"256203": {
|
|
"content": "<mask>",
|
|
"lstrip": true,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
},
|
|
"3": {
|
|
"content": "<unk>",
|
|
"lstrip": false,
|
|
"normalized": false,
|
|
"rstrip": false,
|
|
"single_word": false,
|
|
"special": true
|
|
}
|
|
},
|
|
"additional_special_tokens": [
|
|
"ace_Arab",
|
|
"ace_Latn",
|
|
"acm_Arab",
|
|
"acq_Arab",
|
|
"aeb_Arab",
|
|
"afr_Latn",
|
|
"ajp_Arab",
|
|
"aka_Latn",
|
|
"amh_Ethi",
|
|
"apc_Arab",
|
|
"arb_Arab",
|
|
"ars_Arab",
|
|
"ary_Arab",
|
|
"arz_Arab",
|
|
"asm_Beng",
|
|
"ast_Latn",
|
|
"awa_Deva",
|
|
"ayr_Latn",
|
|
"azb_Arab",
|
|
"azj_Latn",
|
|
"bak_Cyrl",
|
|
"bam_Latn",
|
|
"ban_Latn",
|
|
"bel_Cyrl",
|
|
"bem_Latn",
|
|
"ben_Beng",
|
|
"bho_Deva",
|
|
"bjn_Arab",
|
|
"bjn_Latn",
|
|
"bod_Tibt",
|
|
"bos_Latn",
|
|
"bug_Latn",
|
|
"bul_Cyrl",
|
|
"cat_Latn",
|
|
"ceb_Latn",
|
|
"ces_Latn",
|
|
"cjk_Latn",
|
|
"ckb_Arab",
|
|
"crh_Latn",
|
|
"cym_Latn",
|
|
"dan_Latn",
|
|
"deu_Latn",
|
|
"dik_Latn",
|
|
"dyu_Latn",
|
|
"dzo_Tibt",
|
|
"ell_Grek",
|
|
"eng_Latn",
|
|
"epo_Latn",
|
|
"est_Latn",
|
|
"eus_Latn",
|
|
"ewe_Latn",
|
|
"fao_Latn",
|
|
"pes_Arab",
|
|
"fij_Latn",
|
|
"fin_Latn",
|
|
"fon_Latn",
|
|
"fra_Latn",
|
|
"fur_Latn",
|
|
"fuv_Latn",
|
|
"gla_Latn",
|
|
"gle_Latn",
|
|
"glg_Latn",
|
|
"grn_Latn",
|
|
"guj_Gujr",
|
|
"hat_Latn",
|
|
"hau_Latn",
|
|
"heb_Hebr",
|
|
"hin_Deva",
|
|
"hne_Deva",
|
|
"hrv_Latn",
|
|
"hun_Latn",
|
|
"hye_Armn",
|
|
"ibo_Latn",
|
|
"ilo_Latn",
|
|
"ind_Latn",
|
|
"isl_Latn",
|
|
"ita_Latn",
|
|
"jav_Latn",
|
|
"jpn_Jpan",
|
|
"kab_Latn",
|
|
"kac_Latn",
|
|
"kam_Latn",
|
|
"kan_Knda",
|
|
"kas_Arab",
|
|
"kas_Deva",
|
|
"kat_Geor",
|
|
"knc_Arab",
|
|
"knc_Latn",
|
|
"kaz_Cyrl",
|
|
"kbp_Latn",
|
|
"kea_Latn",
|
|
"khm_Khmr",
|
|
"kik_Latn",
|
|
"kin_Latn",
|
|
"kir_Cyrl",
|
|
"kmb_Latn",
|
|
"kon_Latn",
|
|
"kor_Hang",
|
|
"kmr_Latn",
|
|
"lao_Laoo",
|
|
"lvs_Latn",
|
|
"lij_Latn",
|
|
"lim_Latn",
|
|
"lin_Latn",
|
|
"lit_Latn",
|
|
"lmo_Latn",
|
|
"ltg_Latn",
|
|
"ltz_Latn",
|
|
"lua_Latn",
|
|
"lug_Latn",
|
|
"luo_Latn",
|
|
"lus_Latn",
|
|
"mag_Deva",
|
|
"mai_Deva",
|
|
"mal_Mlym",
|
|
"mar_Deva",
|
|
"min_Latn",
|
|
"mkd_Cyrl",
|
|
"plt_Latn",
|
|
"mlt_Latn",
|
|
"mni_Beng",
|
|
"khk_Cyrl",
|
|
"mos_Latn",
|
|
"mri_Latn",
|
|
"zsm_Latn",
|
|
"mya_Mymr",
|
|
"nld_Latn",
|
|
"nno_Latn",
|
|
"nob_Latn",
|
|
"npi_Deva",
|
|
"nso_Latn",
|
|
"nus_Latn",
|
|
"nya_Latn",
|
|
"oci_Latn",
|
|
"gaz_Latn",
|
|
"ory_Orya",
|
|
"pag_Latn",
|
|
"pan_Guru",
|
|
"pap_Latn",
|
|
"pol_Latn",
|
|
"por_Latn",
|
|
"prs_Arab",
|
|
"pbt_Arab",
|
|
"quy_Latn",
|
|
"ron_Latn",
|
|
"run_Latn",
|
|
"rus_Cyrl",
|
|
"sag_Latn",
|
|
"san_Deva",
|
|
"sat_Beng",
|
|
"scn_Latn",
|
|
"shn_Mymr",
|
|
"sin_Sinh",
|
|
"slk_Latn",
|
|
"slv_Latn",
|
|
"smo_Latn",
|
|
"sna_Latn",
|
|
"snd_Arab",
|
|
"som_Latn",
|
|
"sot_Latn",
|
|
"spa_Latn",
|
|
"als_Latn",
|
|
"srd_Latn",
|
|
"srp_Cyrl",
|
|
"ssw_Latn",
|
|
"sun_Latn",
|
|
"swe_Latn",
|
|
"swh_Latn",
|
|
"szl_Latn",
|
|
"tam_Taml",
|
|
"tat_Cyrl",
|
|
"tel_Telu",
|
|
"tgk_Cyrl",
|
|
"tgl_Latn",
|
|
"tha_Thai",
|
|
"tir_Ethi",
|
|
"taq_Latn",
|
|
"taq_Tfng",
|
|
"tpi_Latn",
|
|
"tsn_Latn",
|
|
"tso_Latn",
|
|
"tuk_Latn",
|
|
"tum_Latn",
|
|
"tur_Latn",
|
|
"twi_Latn",
|
|
"tzm_Tfng",
|
|
"uig_Arab",
|
|
"ukr_Cyrl",
|
|
"umb_Latn",
|
|
"urd_Arab",
|
|
"uzn_Latn",
|
|
"vec_Latn",
|
|
"vie_Latn",
|
|
"war_Latn",
|
|
"wol_Latn",
|
|
"xho_Latn",
|
|
"ydd_Hebr",
|
|
"yor_Latn",
|
|
"yue_Hant",
|
|
"zho_Hans",
|
|
"zho_Hant",
|
|
"zul_Latn"
|
|
],
|
|
"bos_token": "<s>",
|
|
"clean_up_tokenization_spaces": true,
|
|
"cls_token": "<s>",
|
|
"eos_token": "</s>",
|
|
"legacy_behaviour": false,
|
|
"mask_token": {
|
|
"__type": "AddedToken",
|
|
"content": "<mask>",
|
|
"lstrip": true,
|
|
"normalized": true,
|
|
"rstrip": false,
|
|
"single_word": false
|
|
},
|
|
"model_max_length": 1024,
|
|
"pad_token": "<pad>",
|
|
"sep_token": "</s>",
|
|
"sp_model_kwargs": {},
|
|
"src_lang": "eng_Latn",
|
|
"tgt_lang": null,
|
|
"tokenizer_class": "NllbTokenizer",
|
|
"unk_token": "<unk>"
|
|
}
|
|
|