Spanish
sapinedamo commited on
Commit
4c0a3c5
1 Parent(s): 5013372

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,11 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "[PAD]",
4
- "<s>",
5
- "</s>",
6
- "<unk>",
7
- "[PAD]"
8
- ],
9
  "bos_token": "<s>",
10
  "eos_token": "</s>",
11
  "pad_token": "[PAD]",
 
1
  {
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
  "pad_token": "[PAD]",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9051199beacf71b5afaf6058198951c587ac0d6702f3871b122d8bf5660d72b7
3
- size 14501256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1785bcb6a31ceabdec1237f1c8330b52b368cee31213ab9eab322f1c4e399485
3
+ size 14501099
tokenizer_config.json CHANGED
@@ -1,12 +1,11 @@
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<s>",
 
4
  "eos_token": "</s>",
5
- "max_length": 512,
6
  "model_max_length": 1000000000000000019884624838656,
7
  "pad_token": "<pad>",
8
  "padding_side": "left",
9
- "special_tokens_map_file": null,
10
  "tokenizer_class": "BloomTokenizer",
11
  "unk_token": "<unk>"
12
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": false,
5
  "eos_token": "</s>",
 
6
  "model_max_length": 1000000000000000019884624838656,
7
  "pad_token": "<pad>",
8
  "padding_side": "left",
 
9
  "tokenizer_class": "BloomTokenizer",
10
  "unk_token": "<unk>"
11
  }