bkbj commited on
Commit
7521432
1 Parent(s): db909ce

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +17 -1
tokenizer_config.json CHANGED
@@ -1,5 +1,21 @@
1
  {
2
  "do_lower_case": true,
3
  "model_type": "bert",
4
- "vocab_size": 30522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
  "do_lower_case": true,
3
  "model_type": "bert",
4
+ "vocab_size": 30522,
5
+ "special_tokens_map": {
6
+ "unk_token": "[UNK]",
7
+ "sep_token": "[SEP]",
8
+ "pad_token": "[PAD]",
9
+ "cls_token": "[CLS]",
10
+ "mask_token": "[MASK]",
11
+ "eos_token": "<|endoftext|>",
12
+ "bos_token": "<|startoftext|>",
13
+ "user_token": "<|user|>",
14
+ "assistant_token": "<|assistant|>"
15
+ },
16
+ "padding_side": "right",
17
+ "truncation_side": "right",
18
+ "max_length": 1024,
19
+ "use_fast": true,
20
+ "tokenizer_class": "BertTokenizer"
21
  }