TheBloke commited on
Commit
d4618f3
1 Parent(s): 8bb1ffa

Add fast tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "[PAD]": 32000
3
- }
 
1
  {
2
+ "[PAD]": 32000
3
+ }
special_tokens_map.json CHANGED
@@ -1,24 +1,24 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": "[PAD]",
17
- "unk_token": {
18
- "content": "<unk>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
- }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -31,4 +31,4 @@
31
  "rstrip": false,
32
  "single_word": false
33
  }
34
- }
 
31
  "rstrip": false,
32
  "single_word": false
33
  }
34
+ }