add tokenizer
Browse files- vocab.json +7 -8
vocab.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"[PAD]":
|
3 |
-
"[UNK]":
|
4 |
"|": 10,
|
5 |
"ء": 18,
|
6 |
"آ": 21,
|
@@ -11,7 +11,7 @@
|
|
11 |
"ا": 14,
|
12 |
"ب": 30,
|
13 |
"ة": 2,
|
14 |
-
"ت":
|
15 |
"ث": 31,
|
16 |
"ج": 26,
|
17 |
"ح": 7,
|
@@ -29,19 +29,18 @@
|
|
29 |
"ع": 11,
|
30 |
"غ": 32,
|
31 |
"ف": 34,
|
32 |
-
"ق":
|
33 |
-
"ك":
|
34 |
"ل": 4,
|
35 |
"م": 25,
|
36 |
"ن": 24,
|
37 |
"ه": 22,
|
38 |
-
"و":
|
39 |
"ى": 19,
|
40 |
"ي": 15,
|
41 |
"ٱ": 12,
|
42 |
"چ": 37,
|
43 |
"ڤ": 28,
|
44 |
"ک": 6,
|
45 |
-
"ی": 17
|
46 |
-
"’": 38
|
47 |
}
|
|
|
1 |
{
|
2 |
+
"[PAD]": 43,
|
3 |
+
"[UNK]": 42,
|
4 |
"|": 10,
|
5 |
"ء": 18,
|
6 |
"آ": 21,
|
|
|
11 |
"ا": 14,
|
12 |
"ب": 30,
|
13 |
"ة": 2,
|
14 |
+
"ت": 40,
|
15 |
"ث": 31,
|
16 |
"ج": 26,
|
17 |
"ح": 7,
|
|
|
29 |
"ع": 11,
|
30 |
"غ": 32,
|
31 |
"ف": 34,
|
32 |
+
"ق": 38,
|
33 |
+
"ك": 41,
|
34 |
"ل": 4,
|
35 |
"م": 25,
|
36 |
"ن": 24,
|
37 |
"ه": 22,
|
38 |
+
"و": 39,
|
39 |
"ى": 19,
|
40 |
"ي": 15,
|
41 |
"ٱ": 12,
|
42 |
"چ": 37,
|
43 |
"ڤ": 28,
|
44 |
"ک": 6,
|
45 |
+
"ی": 17
|
|
|
46 |
}
|