jonathanjordan21 commited on
Commit
6a3bb6b
1 Parent(s): d402989

Upload processor

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +71 -11
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +1 -8
special_tokens_map.json CHANGED
@@ -1,19 +1,79 @@
1
  {
2
  "additional_special_tokens": [
3
- "<s_kmpsi>",
4
- "</s_kmpsi>",
5
- "<s_obat>",
6
- "</s_obat>",
7
- "<s_takaran>",
8
- "</s_takaran>",
9
- "<s_komposisi>",
10
- "</s_komposisi>",
11
- "<s_desc>",
12
- "</s_desc>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ],
14
  "bos_token": "<s>",
15
  "cls_token": "<s>",
16
- "eos_token": "</s_kmpsi>",
17
  "mask_token": {
18
  "content": "<mask>",
19
  "lstrip": true,
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "</s_obat>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<s_obat>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "</s_takaran>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</s_desc>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s_desc>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<s_takaran>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "</s_komposisi>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<s_komposisi>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "</s_kmpsi>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<s_kmpsi>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ }
73
  ],
74
  "bos_token": "<s>",
75
  "cls_token": "<s>",
76
+ "eos_token": "</s>",
77
  "mask_token": {
78
  "content": "<mask>",
79
  "lstrip": true,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -161,14 +161,7 @@
161
  "clean_up_tokenization_spaces": true,
162
  "cls_token": "<s>",
163
  "eos_token": "</s>",
164
- "mask_token": {
165
- "__type": "AddedToken",
166
- "content": "<mask>",
167
- "lstrip": true,
168
- "normalized": true,
169
- "rstrip": false,
170
- "single_word": false
171
- },
172
  "model_max_length": 1000000000000000019884624838656,
173
  "pad_token": "<pad>",
174
  "processor_class": "DonutProcessor",
 
161
  "clean_up_tokenization_spaces": true,
162
  "cls_token": "<s>",
163
  "eos_token": "</s>",
164
+ "mask_token": "<mask>",
 
 
 
 
 
 
 
165
  "model_max_length": 1000000000000000019884624838656,
166
  "pad_token": "<pad>",
167
  "processor_class": "DonutProcessor",