AmeerH commited on
Commit
d58df8e
1 Parent(s): ede4450

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -13,7 +13,6 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "unk_token": {
17
  "content": "<|endoftext|>",
18
  "lstrip": false,
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -11,13 +11,33 @@
11
  "special": true
12
  }
13
  },
14
- "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
15
  "clean_up_tokenization_spaces": true,
16
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
17
  "errors": "replace",
18
  "model_max_length": 1024,
19
- "pad_token": "<|endoftext|>",
20
  "tokenizer_class": "GPT2Tokenizer",
21
- "unk_token": "<|endoftext|>",
22
- "use_cache": false
 
 
 
 
 
 
23
  }
 
11
  "special": true
12
  }
13
  },
14
+ "bos_token": {
15
+ "__type": "AddedToken",
16
+ "content": "<|endoftext|>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
  "clean_up_tokenization_spaces": true,
23
+ "eos_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
  "errors": "replace",
32
  "model_max_length": 1024,
33
+ "pad_token": null,
34
  "tokenizer_class": "GPT2Tokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<|endoftext|>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff