Abhaykoul commited on
Commit
5b72292
1 Parent(s): 52fdb44
Files changed (2) hide show
  1. tokenizer.json +1 -2
  2. tokenizer_config.json +208 -208
tokenizer.json CHANGED
@@ -199,7 +199,7 @@
199
  "lstrip": false,
200
  "rstrip": false,
201
  "normalized": true,
202
- "special": false
203
  },
204
  {
205
  "id": 32022,
@@ -339,7 +339,6 @@
339
  "end_of_word_suffix": null,
340
  "fuse_unk": false,
341
  "byte_fallback": false,
342
- "ignore_merges": false,
343
  "vocab": {
344
  "!": 0,
345
  "\"": 1,
 
199
  "lstrip": false,
200
  "rstrip": false,
201
  "normalized": true,
202
+ "special": true
203
  },
204
  {
205
  "id": 32022,
 
339
  "end_of_word_suffix": null,
340
  "fuse_unk": false,
341
  "byte_fallback": false,
 
342
  "vocab": {
343
  "!": 0,
344
  "\"": 1,
tokenizer_config.json CHANGED
@@ -1,209 +1,209 @@
1
  {
2
- "added_tokens_decoder": {
3
- "32000": {
4
- "content": "\u00f5",
5
- "lstrip": false,
6
- "normalized": true,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": false
10
- },
11
- "32001": {
12
- "content": "\u00f7",
13
- "lstrip": false,
14
- "normalized": true,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": false
18
- },
19
- "32002": {
20
- "content": "\u00c1",
21
- "lstrip": false,
22
- "normalized": true,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": false
26
- },
27
- "32003": {
28
- "content": "\u00fd",
29
- "lstrip": false,
30
- "normalized": true,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": false
34
- },
35
- "32004": {
36
- "content": "\u00c0",
37
- "lstrip": false,
38
- "normalized": true,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": false
42
- },
43
- "32005": {
44
- "content": "\u00ff",
45
- "lstrip": false,
46
- "normalized": true,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": false
50
- },
51
- "32006": {
52
- "content": "\u00f8",
53
- "lstrip": false,
54
- "normalized": true,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": false
58
- },
59
- "32007": {
60
- "content": "\u00fa",
61
- "lstrip": false,
62
- "normalized": true,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": false
66
- },
67
- "32008": {
68
- "content": "\u00fe",
69
- "lstrip": false,
70
- "normalized": true,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": false
74
- },
75
- "32009": {
76
- "content": "\u00fc",
77
- "lstrip": false,
78
- "normalized": true,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": false
82
- },
83
- "32010": {
84
- "content": "\u00f9",
85
- "lstrip": false,
86
- "normalized": true,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": false
90
- },
91
- "32011": {
92
- "content": "\u00f6",
93
- "lstrip": false,
94
- "normalized": true,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": false
98
- },
99
- "32012": {
100
- "content": "\u00fb",
101
- "lstrip": false,
102
- "normalized": true,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": false
106
- },
107
- "32013": {
108
- "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
109
- "lstrip": false,
110
- "normalized": true,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": true
114
- },
115
- "32014": {
116
- "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
117
- "lstrip": false,
118
- "normalized": true,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "32015": {
124
- "content": "<\uff5cfim\u2581hole\uff5c>",
125
- "lstrip": false,
126
- "normalized": true,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": false
130
- },
131
- "32016": {
132
- "content": "<\uff5cfim\u2581begin\uff5c>",
133
- "lstrip": false,
134
- "normalized": true,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": false
138
- },
139
- "32017": {
140
- "content": "<\uff5cfim\u2581end\uff5c>",
141
- "lstrip": false,
142
- "normalized": true,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": false
146
- },
147
- "32018": {
148
- "content": "<pad>",
149
- "lstrip": false,
150
- "normalized": true,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": false
154
- },
155
- "32019": {
156
- "content": "<|User|>",
157
- "lstrip": false,
158
- "normalized": true,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": false
162
- },
163
- "32020": {
164
- "content": "<|Assistant|>",
165
- "lstrip": false,
166
- "normalized": true,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": false
170
- },
171
- "32021": {
172
- "content": "<|EOT|>",
173
- "lstrip": false,
174
- "normalized": true,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": true
178
- },
179
- "32022": {
180
- "content": "<API_RUN_START>",
181
- "lstrip": false,
182
- "normalized": true,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": false
186
- },
187
- "32023": {
188
- "content": "<API_RUN_STOP>",
189
- "lstrip": false,
190
- "normalized": true,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": false
194
- }
195
- },
196
- "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
197
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}",
198
- "clean_up_tokenization_spaces": false,
199
- "eos_token": "<|EOT|>",
200
- "legacy": true,
201
- "model_max_length": 16384,
202
- "pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
203
- "padding_side": "right",
204
- "sp_model_kwargs": {},
205
- "split_special_tokens": false,
206
- "tokenizer_class": "LlamaTokenizer",
207
- "unk_token": null,
208
- "use_default_system_prompt": false
209
- }
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "32000": {
4
+ "content": "õ",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "32001": {
12
+ "content": "÷",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "32002": {
20
+ "content": "Á",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "32003": {
28
+ "content": "ý",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "32004": {
36
+ "content": "À",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "32005": {
44
+ "content": "ÿ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "32006": {
52
+ "content": "ø",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "32007": {
60
+ "content": "ú",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "32008": {
68
+ "content": "þ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "32009": {
76
+ "content": "ü",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "32010": {
84
+ "content": "ù",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "32011": {
92
+ "content": "ö",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "32012": {
100
+ "content": "û",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "32013": {
108
+ "content": "<|begin▁of▁sentence|>",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32014": {
116
+ "content": "<|end▁of▁sentence|>",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32015": {
124
+ "content": "<|fim▁hole|>",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "32016": {
132
+ "content": "<|fim▁begin|>",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "32017": {
140
+ "content": "<|fim▁end|>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "32018": {
148
+ "content": "<pad>",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "32019": {
156
+ "content": "<|User|>",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "32020": {
164
+ "content": "<|Assistant|>",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "32021": {
172
+ "content": "<|EOT|>",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32022": {
180
+ "content": "<API_RUN_START>",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "32023": {
188
+ "content": "<API_RUN_STOP>",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ }
195
+ },
196
+ "bos_token": "<|begin▁of▁sentence|>",
197
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}",
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|EOT|>",
200
+ "legacy": true,
201
+ "model_max_length": 16384,
202
+ "pad_token": "<|end▁of▁sentence|>",
203
+ "padding_side": "right",
204
+ "sp_model_kwargs": {},
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "LlamaTokenizer",
207
+ "unk_token": null,
208
+ "use_default_system_prompt": false
209
+ }