Isaak Carter Augustus commited on
Commit
73a3132
1 Parent(s): 5cdcc9a

Upload tokenizer

Browse files
README.md CHANGED
@@ -1,4 +1,3 @@
1
-
2
  ---
3
  library_name: transformers
4
  base_model: google/gemma-2b-it
 
 
1
  ---
2
  library_name: transformers
3
  base_model: google/gemma-2b-it
added_tokens.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "<|assistant|>": 256007,
3
- "<|context|>": 256015,
4
- "<|current_states|>": 256014,
5
- "<|endoftext|>": 256001,
6
- "<|function_call|>": 256008,
7
- "<|function_response|>": 256009,
8
- "<|functions|>": 256002,
9
- "<|gökdeniz|>": 256004,
10
- "<|home_state|>": 256013,
11
- "<|image|>": 256010,
12
- "<|josie|>": 256006,
13
- "<|long_term_memory|>": 256011,
14
- "<|short_term_memory|>": 256012,
15
- "<|startoftext|>": 256000,
16
- "<|system|>": 256003,
17
- "<|user|>": 256005,
18
- "Gökdeniz": 256017,
19
- "Gökdeniz Gülmez": 256016,
20
- "Gülmez": 256018,
21
- "J.O.S.I.E.": 256020,
22
- "JOSIE": 256019,
23
- "Josie": 256021,
24
- "Just an Outstandingly Smart and Intelligent Entity": 256023,
25
- "josie": 256022
26
  }
 
1
  {
2
+ "<|assistant|>": 32007,
3
+ "<|context|>": 32015,
4
+ "<|current_states|>": 32014,
5
+ "<|endoftext|>": 32001,
6
+ "<|function_call|>": 32008,
7
+ "<|function_response|>": 32009,
8
+ "<|functions|>": 32002,
9
+ "<|gökdeniz|>": 32004,
10
+ "<|home_state|>": 32013,
11
+ "<|image|>": 32010,
12
+ "<|josie|>": 32006,
13
+ "<|long_term_memory|>": 32011,
14
+ "<|short_term_memory|>": 32012,
15
+ "<|startoftext|>": 32000,
16
+ "<|system|>": 32003,
17
+ "<|user|>": 32005,
18
+ "Gökdeniz": 32017,
19
+ "Gökdeniz Gülmez": 32016,
20
+ "Gülmez": 32018,
21
+ "J.O.S.I.E.": 32020,
22
+ "JOSIE": 32019,
23
+ "Josie": 32021,
24
+ "Just an Outstandingly Smart and Intelligent Entity": 32023,
25
+ "josie": 32022
26
  }
special_tokens_map.json CHANGED
@@ -113,13 +113,6 @@
113
  "rstrip": false,
114
  "single_word": false
115
  },
116
- "pad_token": {
117
- "content": "<pad>",
118
- "lstrip": false,
119
- "normalized": false,
120
- "rstrip": false,
121
- "single_word": false
122
- },
123
  "unk_token": {
124
  "content": "<unk>",
125
  "lstrip": false,
 
113
  "rstrip": false,
114
  "single_word": false
115
  },
 
 
 
 
 
 
 
116
  "unk_token": {
117
  "content": "<unk>",
118
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:219727449dea40daaa3c0d7f68974e2e452597646e8f84b2ed517af9094b9a8d
3
- size 17482510
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcf5bc7fc4e420d0102f71c9346b225ae73e3988c21e0cb9d66bcba19915644d
3
+ size 1799860
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
- size 4241003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
- "content": "<pad>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -11,7 +11,7 @@
11
  "special": true
12
  },
13
  "1": {
14
- "content": "<eos>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
@@ -19,38 +19,14 @@
19
  "special": true
20
  },
21
  "2": {
22
- "content": "<bos>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "3": {
30
- "content": "<unk>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "106": {
38
- "content": "<start_of_turn>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "107": {
46
- "content": "<end_of_turn>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  },
53
- "256000": {
54
  "content": "<|startoftext|>",
55
  "lstrip": false,
56
  "normalized": false,
@@ -58,7 +34,7 @@
58
  "single_word": false,
59
  "special": true
60
  },
61
- "256001": {
62
  "content": "<|endoftext|>",
63
  "lstrip": false,
64
  "normalized": false,
@@ -66,7 +42,7 @@
66
  "single_word": false,
67
  "special": true
68
  },
69
- "256002": {
70
  "content": "<|functions|>",
71
  "lstrip": false,
72
  "normalized": false,
@@ -74,7 +50,7 @@
74
  "single_word": false,
75
  "special": true
76
  },
77
- "256003": {
78
  "content": "<|system|>",
79
  "lstrip": false,
80
  "normalized": false,
@@ -82,7 +58,7 @@
82
  "single_word": false,
83
  "special": true
84
  },
85
- "256004": {
86
  "content": "<|gökdeniz|>",
87
  "lstrip": false,
88
  "normalized": false,
@@ -90,7 +66,7 @@
90
  "single_word": false,
91
  "special": true
92
  },
93
- "256005": {
94
  "content": "<|user|>",
95
  "lstrip": false,
96
  "normalized": false,
@@ -98,7 +74,7 @@
98
  "single_word": false,
99
  "special": true
100
  },
101
- "256006": {
102
  "content": "<|josie|>",
103
  "lstrip": false,
104
  "normalized": false,
@@ -106,7 +82,7 @@
106
  "single_word": false,
107
  "special": true
108
  },
109
- "256007": {
110
  "content": "<|assistant|>",
111
  "lstrip": false,
112
  "normalized": false,
@@ -114,7 +90,7 @@
114
  "single_word": false,
115
  "special": true
116
  },
117
- "256008": {
118
  "content": "<|function_call|>",
119
  "lstrip": false,
120
  "normalized": false,
@@ -122,7 +98,7 @@
122
  "single_word": false,
123
  "special": true
124
  },
125
- "256009": {
126
  "content": "<|function_response|>",
127
  "lstrip": false,
128
  "normalized": false,
@@ -130,7 +106,7 @@
130
  "single_word": false,
131
  "special": true
132
  },
133
- "256010": {
134
  "content": "<|image|>",
135
  "lstrip": false,
136
  "normalized": false,
@@ -138,7 +114,7 @@
138
  "single_word": false,
139
  "special": true
140
  },
141
- "256011": {
142
  "content": "<|long_term_memory|>",
143
  "lstrip": false,
144
  "normalized": false,
@@ -146,7 +122,7 @@
146
  "single_word": false,
147
  "special": true
148
  },
149
- "256012": {
150
  "content": "<|short_term_memory|>",
151
  "lstrip": false,
152
  "normalized": false,
@@ -154,7 +130,7 @@
154
  "single_word": false,
155
  "special": true
156
  },
157
- "256013": {
158
  "content": "<|home_state|>",
159
  "lstrip": false,
160
  "normalized": false,
@@ -162,7 +138,7 @@
162
  "single_word": false,
163
  "special": true
164
  },
165
- "256014": {
166
  "content": "<|current_states|>",
167
  "lstrip": false,
168
  "normalized": false,
@@ -170,7 +146,7 @@
170
  "single_word": false,
171
  "special": true
172
  },
173
- "256015": {
174
  "content": "<|context|>",
175
  "lstrip": false,
176
  "normalized": false,
@@ -178,7 +154,7 @@
178
  "single_word": false,
179
  "special": true
180
  },
181
- "256016": {
182
  "content": "Gökdeniz Gülmez",
183
  "lstrip": false,
184
  "normalized": true,
@@ -186,7 +162,7 @@
186
  "single_word": false,
187
  "special": false
188
  },
189
- "256017": {
190
  "content": "Gökdeniz",
191
  "lstrip": false,
192
  "normalized": true,
@@ -194,7 +170,7 @@
194
  "single_word": false,
195
  "special": false
196
  },
197
- "256018": {
198
  "content": "Gülmez",
199
  "lstrip": false,
200
  "normalized": true,
@@ -202,7 +178,7 @@
202
  "single_word": false,
203
  "special": false
204
  },
205
- "256019": {
206
  "content": "JOSIE",
207
  "lstrip": false,
208
  "normalized": true,
@@ -210,7 +186,7 @@
210
  "single_word": false,
211
  "special": false
212
  },
213
- "256020": {
214
  "content": "J.O.S.I.E.",
215
  "lstrip": false,
216
  "normalized": true,
@@ -218,7 +194,7 @@
218
  "single_word": false,
219
  "special": false
220
  },
221
- "256021": {
222
  "content": "Josie",
223
  "lstrip": false,
224
  "normalized": true,
@@ -226,7 +202,7 @@
226
  "single_word": false,
227
  "special": false
228
  },
229
- "256022": {
230
  "content": "josie",
231
  "lstrip": false,
232
  "normalized": true,
@@ -234,7 +210,7 @@
234
  "single_word": false,
235
  "special": false
236
  },
237
- "256023": {
238
  "content": "Just an Outstandingly Smart and Intelligent Entity",
239
  "lstrip": false,
240
  "normalized": true,
@@ -260,15 +236,14 @@
260
  "<|context|>"
261
  ],
262
  "bos_token": "<|startoftext|>",
263
- "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
264
  "clean_up_tokenization_spaces": false,
265
  "eos_token": "<|endoftext|>",
266
- "legacy": null,
267
  "model_max_length": 1000000000000000019884624838656,
268
- "pad_token": "<pad>",
269
  "sp_model_kwargs": {},
270
  "spaces_between_special_tokens": false,
271
- "tokenizer_class": "GemmaTokenizer",
272
  "unk_token": "<unk>",
273
  "use_default_system_prompt": false
274
  }
 
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
+ "content": "<unk>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
 
11
  "special": true
12
  },
13
  "1": {
14
+ "content": "<s>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
 
19
  "special": true
20
  },
21
  "2": {
22
+ "content": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
  },
29
+ "32000": {
30
  "content": "<|startoftext|>",
31
  "lstrip": false,
32
  "normalized": false,
 
34
  "single_word": false,
35
  "special": true
36
  },
37
+ "32001": {
38
  "content": "<|endoftext|>",
39
  "lstrip": false,
40
  "normalized": false,
 
42
  "single_word": false,
43
  "special": true
44
  },
45
+ "32002": {
46
  "content": "<|functions|>",
47
  "lstrip": false,
48
  "normalized": false,
 
50
  "single_word": false,
51
  "special": true
52
  },
53
+ "32003": {
54
  "content": "<|system|>",
55
  "lstrip": false,
56
  "normalized": false,
 
58
  "single_word": false,
59
  "special": true
60
  },
61
+ "32004": {
62
  "content": "<|gökdeniz|>",
63
  "lstrip": false,
64
  "normalized": false,
 
66
  "single_word": false,
67
  "special": true
68
  },
69
+ "32005": {
70
  "content": "<|user|>",
71
  "lstrip": false,
72
  "normalized": false,
 
74
  "single_word": false,
75
  "special": true
76
  },
77
+ "32006": {
78
  "content": "<|josie|>",
79
  "lstrip": false,
80
  "normalized": false,
 
82
  "single_word": false,
83
  "special": true
84
  },
85
+ "32007": {
86
  "content": "<|assistant|>",
87
  "lstrip": false,
88
  "normalized": false,
 
90
  "single_word": false,
91
  "special": true
92
  },
93
+ "32008": {
94
  "content": "<|function_call|>",
95
  "lstrip": false,
96
  "normalized": false,
 
98
  "single_word": false,
99
  "special": true
100
  },
101
+ "32009": {
102
  "content": "<|function_response|>",
103
  "lstrip": false,
104
  "normalized": false,
 
106
  "single_word": false,
107
  "special": true
108
  },
109
+ "32010": {
110
  "content": "<|image|>",
111
  "lstrip": false,
112
  "normalized": false,
 
114
  "single_word": false,
115
  "special": true
116
  },
117
+ "32011": {
118
  "content": "<|long_term_memory|>",
119
  "lstrip": false,
120
  "normalized": false,
 
122
  "single_word": false,
123
  "special": true
124
  },
125
+ "32012": {
126
  "content": "<|short_term_memory|>",
127
  "lstrip": false,
128
  "normalized": false,
 
130
  "single_word": false,
131
  "special": true
132
  },
133
+ "32013": {
134
  "content": "<|home_state|>",
135
  "lstrip": false,
136
  "normalized": false,
 
138
  "single_word": false,
139
  "special": true
140
  },
141
+ "32014": {
142
  "content": "<|current_states|>",
143
  "lstrip": false,
144
  "normalized": false,
 
146
  "single_word": false,
147
  "special": true
148
  },
149
+ "32015": {
150
  "content": "<|context|>",
151
  "lstrip": false,
152
  "normalized": false,
 
154
  "single_word": false,
155
  "special": true
156
  },
157
+ "32016": {
158
  "content": "Gökdeniz Gülmez",
159
  "lstrip": false,
160
  "normalized": true,
 
162
  "single_word": false,
163
  "special": false
164
  },
165
+ "32017": {
166
  "content": "Gökdeniz",
167
  "lstrip": false,
168
  "normalized": true,
 
170
  "single_word": false,
171
  "special": false
172
  },
173
+ "32018": {
174
  "content": "Gülmez",
175
  "lstrip": false,
176
  "normalized": true,
 
178
  "single_word": false,
179
  "special": false
180
  },
181
+ "32019": {
182
  "content": "JOSIE",
183
  "lstrip": false,
184
  "normalized": true,
 
186
  "single_word": false,
187
  "special": false
188
  },
189
+ "32020": {
190
  "content": "J.O.S.I.E.",
191
  "lstrip": false,
192
  "normalized": true,
 
194
  "single_word": false,
195
  "special": false
196
  },
197
+ "32021": {
198
  "content": "Josie",
199
  "lstrip": false,
200
  "normalized": true,
 
202
  "single_word": false,
203
  "special": false
204
  },
205
+ "32022": {
206
  "content": "josie",
207
  "lstrip": false,
208
  "normalized": true,
 
210
  "single_word": false,
211
  "special": false
212
  },
213
+ "32023": {
214
  "content": "Just an Outstandingly Smart and Intelligent Entity",
215
  "lstrip": false,
216
  "normalized": true,
 
236
  "<|context|>"
237
  ],
238
  "bos_token": "<|startoftext|>",
 
239
  "clean_up_tokenization_spaces": false,
240
  "eos_token": "<|endoftext|>",
241
+ "legacy": true,
242
  "model_max_length": 1000000000000000019884624838656,
243
+ "pad_token": null,
244
  "sp_model_kwargs": {},
245
  "spaces_between_special_tokens": false,
246
+ "tokenizer_class": "LlamaTokenizer",
247
  "unk_token": "<unk>",
248
  "use_default_system_prompt": false
249
  }