Abhaykoul commited on
Commit
52fdb44
1 Parent(s): 7ebd09f

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +208 -210
tokenizer_config.json CHANGED
@@ -1,211 +1,209 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "added_tokens_decoder": {
5
- "32000": {
6
- "content": "õ",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": false
12
- },
13
- "32001": {
14
- "content": "÷",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": false
20
- },
21
- "32002": {
22
- "content": "Á",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": false
28
- },
29
- "32003": {
30
- "content": "ý",
31
- "lstrip": false,
32
- "normalized": true,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": false
36
- },
37
- "32004": {
38
- "content": "À",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": false
44
- },
45
- "32005": {
46
- "content": "ÿ",
47
- "lstrip": false,
48
- "normalized": true,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": false
52
- },
53
- "32006": {
54
- "content": "ø",
55
- "lstrip": false,
56
- "normalized": true,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": false
60
- },
61
- "32007": {
62
- "content": "ú",
63
- "lstrip": false,
64
- "normalized": true,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": false
68
- },
69
- "32008": {
70
- "content": "þ",
71
- "lstrip": false,
72
- "normalized": true,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": false
76
- },
77
- "32009": {
78
- "content": "ü",
79
- "lstrip": false,
80
- "normalized": true,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": false
84
- },
85
- "32010": {
86
- "content": "ù",
87
- "lstrip": false,
88
- "normalized": true,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": false
92
- },
93
- "32011": {
94
- "content": "ö",
95
- "lstrip": false,
96
- "normalized": true,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": false
100
- },
101
- "32012": {
102
- "content": "û",
103
- "lstrip": false,
104
- "normalized": true,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": false
108
- },
109
- "32013": {
110
- "content": "<|begin▁of▁sentence|>",
111
- "lstrip": false,
112
- "normalized": true,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "32014": {
118
- "content": "<|end▁of▁sentence|>",
119
- "lstrip": false,
120
- "normalized": true,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": true
124
- },
125
- "32015": {
126
- "content": "<|fim▁hole|>",
127
- "lstrip": false,
128
- "normalized": true,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "32016": {
134
- "content": "<|fim▁begin|>",
135
- "lstrip": false,
136
- "normalized": true,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "32017": {
142
- "content": "<|fim▁end|>",
143
- "lstrip": false,
144
- "normalized": true,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "32018": {
150
- "content": "<pad>",
151
- "lstrip": false,
152
- "normalized": true,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "32019": {
158
- "content": "<|User|>",
159
- "lstrip": false,
160
- "normalized": true,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "32020": {
166
- "content": "<|Assistant|>",
167
- "lstrip": false,
168
- "normalized": true,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "32021": {
174
- "content": "<|EOT|>",
175
- "lstrip": false,
176
- "normalized": true,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "32022": {
182
- "content": "<API_RUN_START>",
183
- "lstrip": false,
184
- "normalized": true,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "32023": {
190
- "content": "<API_RUN_STOP>",
191
- "lstrip": false,
192
- "normalized": true,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- }
197
- },
198
- "bos_token": "<|begin▁of▁sentence|>",
199
- "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message + '\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\\nAssistant: ' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end▁of▁sentence|>' + '\\n' }}{% endif %}{% endfor %}",
200
- "clean_up_tokenization_spaces": false,
201
- "eos_token": "<|EOT|>",
202
- "legacy": true,
203
- "model_max_length": 16384,
204
- "pad_token": "<|end▁of▁sentence|>",
205
- "padding_side": "right",
206
- "sp_model_kwargs": {},
207
- "split_special_tokens": false,
208
- "tokenizer_class": "LlamaTokenizer",
209
- "unk_token": null,
210
- "use_default_system_prompt": false
211
- }
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "32000": {
4
+ "content": "\u00f5",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "32001": {
12
+ "content": "\u00f7",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "32002": {
20
+ "content": "\u00c1",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "32003": {
28
+ "content": "\u00fd",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "32004": {
36
+ "content": "\u00c0",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "32005": {
44
+ "content": "\u00ff",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "32006": {
52
+ "content": "\u00f8",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "32007": {
60
+ "content": "\u00fa",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "32008": {
68
+ "content": "\u00fe",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "32009": {
76
+ "content": "\u00fc",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "32010": {
84
+ "content": "\u00f9",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "32011": {
92
+ "content": "\u00f6",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "32012": {
100
+ "content": "\u00fb",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "32013": {
108
+ "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32014": {
116
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32015": {
124
+ "content": "<\uff5cfim\u2581hole\uff5c>",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "32016": {
132
+ "content": "<\uff5cfim\u2581begin\uff5c>",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "32017": {
140
+ "content": "<\uff5cfim\u2581end\uff5c>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "32018": {
148
+ "content": "<pad>",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "32019": {
156
+ "content": "<|User|>",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "32020": {
164
+ "content": "<|Assistant|>",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "32021": {
172
+ "content": "<|EOT|>",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32022": {
180
+ "content": "<API_RUN_START>",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "32023": {
188
+ "content": "<API_RUN_STOP>",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ }
195
+ },
196
+ "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
197
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}",
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|EOT|>",
200
+ "legacy": true,
201
+ "model_max_length": 16384,
202
+ "pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
203
+ "padding_side": "right",
204
+ "sp_model_kwargs": {},
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "LlamaTokenizer",
207
+ "unk_token": null,
208
+ "use_default_system_prompt": false
209
+ }