ad019el commited on
Commit
b42ca29
1 Parent(s): 322b957

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +36 -54
vocab.json CHANGED
@@ -1,68 +1,50 @@
1
  {
2
  "&": 5,
3
- "/": 29,
4
- "0": 34,
5
- "1": 38,
6
  "2": 0,
7
- "7": 43,
8
- "9": 56,
9
- "[PAD]": 65,
10
- "[UNK]": 64,
11
- "_": 13,
12
- "a": 35,
13
- "c": 58,
14
- "e": 31,
15
- "f": 26,
16
- "j": 49,
17
- "l": 40,
18
- "m": 27,
19
- "o": 41,
20
- "q": 46,
21
- "r": 12,
22
- "t": 50,
23
- "u": 15,
24
- "w": 42,
25
- "|": 14,
26
- "ء": 23,
27
- "آ": 28,
28
  "أ": 4,
29
- "ؤ": 52,
30
  "إ": 10,
31
- "ئ": 21,
32
- "ا": 19,
33
- "ب": 47,
34
  "ة": 3,
35
- "ت": 62,
36
- "ث": 48,
37
- "ج": 37,
38
  "ح": 9,
39
- "خ": 45,
40
- "د": 32,
41
- "ذ": 18,
42
  "ر": 11,
43
- "ز": 25,
44
  "س": 1,
45
- "ش": 39,
46
  "ص": 2,
47
  "ض": 7,
48
- "ط": 54,
49
- "ظ": 55,
50
- "ع": 16,
51
- "غ": 51,
52
- "ف": 53,
53
- "ق": 60,
54
- "ك": 63,
55
  "ل": 6,
56
- "م": 36,
57
- "ن": 33,
58
- "ه": 30,
59
- "و": 61,
60
- "ى": 24,
61
- "ي": 20,
62
- "ٱ": 17,
63
- "چ": 57,
64
- "ڤ": 44,
65
  "ک": 8,
66
- "ی": 22,
67
- "’": 59
68
  }
 
1
  {
2
  "&": 5,
3
+ "0": 28,
 
 
4
  "2": 0,
5
+ "[PAD]": 47,
6
+ "[UNK]": 46,
7
+ "e": 25,
8
+ "|": 12,
9
+ "ء": 20,
10
+ "آ": 23,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "أ": 4,
12
+ "ؤ": 37,
13
  "إ": 10,
14
+ "ئ": 18,
15
+ "ا": 16,
16
+ "ب": 34,
17
  "ة": 3,
18
+ "ت": 44,
19
+ "ث": 35,
20
+ "ج": 30,
21
  "ح": 9,
22
+ "خ": 33,
23
+ "د": 26,
24
+ "ذ": 15,
25
  "ر": 11,
26
+ "ز": 22,
27
  "س": 1,
28
+ "ش": 31,
29
  "ص": 2,
30
  "ض": 7,
31
+ "ط": 39,
32
+ "ظ": 40,
33
+ "ع": 13,
34
+ "غ": 36,
35
+ "ف": 38,
36
+ "ق": 42,
37
+ "ك": 45,
38
  "ل": 6,
39
+ "م": 29,
40
+ "ن": 27,
41
+ "ه": 24,
42
+ "و": 43,
43
+ "ى": 21,
44
+ "ي": 17,
45
+ "ٱ": 14,
46
+ "چ": 41,
47
+ "ڤ": 32,
48
  "ک": 8,
49
+ "ی": 19
 
50
  }