ad019el commited on
Commit
582d35f
1 Parent(s): 3a1d4ee

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +7 -8
vocab.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "[PAD]": 44,
3
- "[UNK]": 43,
4
  "|": 10,
5
  "ء": 18,
6
  "آ": 21,
@@ -11,7 +11,7 @@
11
  "ا": 14,
12
  "ب": 30,
13
  "ة": 2,
14
- "ت": 41,
15
  "ث": 31,
16
  "ج": 26,
17
  "ح": 7,
@@ -29,19 +29,18 @@
29
  "ع": 11,
30
  "غ": 32,
31
  "ف": 34,
32
- "ق": 39,
33
- "ك": 42,
34
  "ل": 4,
35
  "م": 25,
36
  "ن": 24,
37
  "ه": 22,
38
- "و": 40,
39
  "ى": 19,
40
  "ي": 15,
41
  "ٱ": 12,
42
  "چ": 37,
43
  "ڤ": 28,
44
  "ک": 6,
45
- "ی": 17,
46
- "’": 38
47
  }
 
1
  {
2
+ "[PAD]": 43,
3
+ "[UNK]": 42,
4
  "|": 10,
5
  "ء": 18,
6
  "آ": 21,
 
11
  "ا": 14,
12
  "ب": 30,
13
  "ة": 2,
14
+ "ت": 40,
15
  "ث": 31,
16
  "ج": 26,
17
  "ح": 7,
 
29
  "ع": 11,
30
  "غ": 32,
31
  "ف": 34,
32
+ "ق": 38,
33
+ "ك": 41,
34
  "ل": 4,
35
  "م": 25,
36
  "ن": 24,
37
  "ه": 22,
38
+ "و": 39,
39
  "ى": 19,
40
  "ي": 15,
41
  "ٱ": 12,
42
  "چ": 37,
43
  "ڤ": 28,
44
  "ک": 6,
45
+ "ی": 17
 
46
  }