cointegrated commited on
Commit
6d2c92e
1 Parent(s): 8df379b

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +16 -9
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd8f59d5f03680eb1d997c836b3b9f1eb0dec3b7cf6890a5cbb6f1ecccfc0a41
3
- size 17331547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92d6f635dbd4537c29524186269deb45d312c324e2b9950b06743b76c02e5b55
3
+ size 17331477
tokenizer_config.json CHANGED
@@ -24,14 +24,6 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
  "256001": {
36
  "content": "ace_Arab",
37
  "lstrip": false,
@@ -1655,6 +1647,14 @@
1655
  "rstrip": false,
1656
  "single_word": false,
1657
  "special": true
 
 
 
 
 
 
 
 
1658
  }
1659
  },
1660
  "additional_special_tokens": [
@@ -1866,7 +1866,14 @@
1866
  "cls_token": "<s>",
1867
  "eos_token": "</s>",
1868
  "legacy_behaviour": false,
1869
- "mask_token": "<mask>",
 
 
 
 
 
 
 
1870
  "model_max_length": 1024,
1871
  "pad_token": "<pad>",
1872
  "sep_token": "</s>",
 
24
  "single_word": false,
25
  "special": true
26
  },
 
 
 
 
 
 
 
 
27
  "256001": {
28
  "content": "ace_Arab",
29
  "lstrip": false,
 
1647
  "rstrip": false,
1648
  "single_word": false,
1649
  "special": true
1650
+ },
1651
+ "3": {
1652
+ "content": "<unk>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
  }
1659
  },
1660
  "additional_special_tokens": [
 
1866
  "cls_token": "<s>",
1867
  "eos_token": "</s>",
1868
  "legacy_behaviour": false,
1869
+ "mask_token": {
1870
+ "__type": "AddedToken",
1871
+ "content": "<mask>",
1872
+ "lstrip": true,
1873
+ "normalized": true,
1874
+ "rstrip": false,
1875
+ "single_word": false
1876
+ },
1877
  "model_max_length": 1024,
1878
  "pad_token": "<pad>",
1879
  "sep_token": "</s>",