goldfish-models commited on
Commit
5afad59
1 Parent(s): 339ffbe

Upload nzi_latn_5mb tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX21]": 20455, "[XXXXX39]": 20473, "[XXXXX45]": 20479, "[XXXXX31]": 20465, "[XXXXX22]": 20456, "<pad>": 20432, "[XXXXX0]": 20434, "[XXXXX13]": 20447, "[XXXXX18]": 20452, "[XXXXX38]": 20472, "[XXXXX41]": 20475, "[XXXXX24]": 20458, "[XXXXX17]": 20451, "[SEP]": 20431, "[XXXXX33]": 20467, "[XXXXX14]": 20448, "[XXXXX43]": 20477, "[XXXXX16]": 20450, "[XXXXX10]": 20444, "[XXXXX44]": 20478, "[XXXXX40]": 20474, "[XXXXX9]": 20443, "[MASK]": 20433, "[XXXXX6]": 20440, "[XXXXX42]": 20476, "[XXXXX30]": 20464, "[XXXXX8]": 20442, "[XXXXX3]": 20437, "[XXXXX28]": 20462, "[XXXXX7]": 20441, "[XXXXX5]": 20439, "[XXXXX2]": 20436, "[XXXXX12]": 20446, "[XXXXX26]": 20460, "[XXXXX29]": 20463, "[XXXXX11]": 20445, "[XXXXX27]": 20461, "[XXXXX35]": 20469, "[XXXXX34]": 20468, "[XXXXX19]": 20453, "[XXXXX15]": 20449, "[XXXXX23]": 20457, "[XXXXX36]": 20470, "[XXXXX1]": 20435, "[CLS]": 20430, "[XXXXX4]": 20438, "[XXXXX25]": 20459, "[XXXXX32]": 20466, "[XXXXX20]": 20454, "[XXXXX37]": 20471}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82f0b3c74356ba96f472480f5a9edd5b84c9ccdd2f1a7dd2963ec19207bb9597
3
+ size 574745
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/nzi_latn_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/nzi_latn_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}