arxyzan commited on
Commit
1705edb
1 Parent(s): a1b901f

Hezar: Upload tokenizer_config.yaml

Browse files
Files changed (1) hide show
  1. preprocessor/tokenizer_config.yaml +12 -0
preprocessor/tokenizer_config.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: tokenizer
2
+ config_type: preprocessor
3
+ model: wordpiece
4
+ max_length: 512
5
+ truncation_strategy: longest_first
6
+ truncation_direction: right
7
+ stride: 0
8
+ padding_strategy: longest
9
+ padding_direction: right
10
+ pad_token_id: 0
11
+ pad_token: '[PAD]'
12
+ pad_token_type_id: 0