File size: 647 Bytes
2b2d98f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
name: sentencepiece_unigram_tokenizer
config_type: preprocessor
pretrained_path: t5-base-fa
max_length: 512
truncation_strategy: longest_first
truncation_direction: right
stride: 0
padding_strategy: longest
padding_direction: right
pad_to_multiple_of: 0
pad_token_id: 0
pad_token: <pad>
pad_token_type_id: 0
unk_token: <unk>
special_tokens:
- <s>
- <pad>
- </s>
- <unk>
- <mask>
- <|endoftext|>
- <|startoftext|>
- <nl>
- <hs>
- <sep>
- <cls>
continuing_subword_prefix: ''
replacement: _
add_prefix_space: true
end_of_word_suffix: ''
fuse_unk: false
vocab_size: 32103
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true