aaniket commited on
Commit
7c79569
1 Parent(s): 3a69fc1

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +63 -74
vocab.json CHANGED
@@ -1,76 +1,65 @@
1
  {
2
- "'": 11,
3
- "[PAD]": 73,
4
- "[UNK]": 72,
5
- "a": 42,
6
- "e": 41,
7
- "f": 21,
8
- "i": 35,
9
- "l": 65,
10
- "m": 23,
11
- "p": 53,
12
- "r": 39,
13
- "u": 49,
14
- "w": 10,
15
- "|": 0,
16
- "": 69,
17
- "": 5,
18
- "": 45,
19
- "": 48,
20
- "": 18,
21
- "": 34,
22
- "": 68,
23
- "": 9,
24
- "": 22,
25
- "": 24,
26
- "": 58,
27
- "": 15,
28
- "": 70,
29
- "": 8,
30
- "": 26,
31
- "": 28,
32
- "": 12,
33
- "": 66,
34
- "": 29,
35
- "": 59,
36
- "": 64,
37
- "": 56,
38
- "": 17,
39
- "": 3,
40
- "": 16,
41
- "": 4,
42
- "": 60,
43
- "": 6,
44
- "": 13,
45
- "": 32,
46
- "": 2,
47
- "": 71,
48
- "": 55,
49
- "": 1,
50
- "": 19,
51
- "": 50,
52
- "": 20,
53
- "": 62,
54
- "": 47,
55
- "": 51,
56
- "": 44,
57
- "": 54,
58
- "": 36,
59
- "": 57,
60
- "": 30,
61
- "": 27,
62
- "": 52,
63
- "ि": 38,
64
- "": 67,
65
- "ु": 43,
66
- "ू": 63,
67
- "ृ": 61,
68
- "े": 37,
69
- "ै": 14,
70
- "ॉ": 25,
71
- "ो": 7,
72
- "ौ": 40,
73
- "्": 31,
74
- "ड़": 33,
75
- "।": 46
76
  }
 
1
  {
2
+ "[PAD]": 62,
3
+ "[UNK]": 61,
4
+ "aa": 47,
5
+ "ae": 29,
6
+ "ah": 44,
7
+ "ao": 37,
8
+ "aw": 30,
9
+ "ax": 41,
10
+ "ax-h": 33,
11
+ "axr": 13,
12
+ "ay": 12,
13
+ "b": 55,
14
+ "bcl": 56,
15
+ "ch": 46,
16
+ "d": 27,
17
+ "dcl": 2,
18
+ "dh": 17,
19
+ "dx": 0,
20
+ "eh": 1,
21
+ "el": 8,
22
+ "em": 39,
23
+ "en": 40,
24
+ "eng": 54,
25
+ "epi": 59,
26
+ "er": 5,
27
+ "ey": 60,
28
+ "f": 53,
29
+ "g": 45,
30
+ "gcl": 6,
31
+ "h#": 14,
32
+ "hh": 31,
33
+ "hv": 51,
34
+ "ih": 16,
35
+ "ix": 9,
36
+ "iy": 25,
37
+ "jh": 3,
38
+ "k": 48,
39
+ "kcl": 28,
40
+ "l": 23,
41
+ "m": 4,
42
+ "n": 38,
43
+ "ng": 21,
44
+ "nx": 49,
45
+ "ow": 35,
46
+ "oy": 24,
47
+ "p": 36,
48
+ "pau": 22,
49
+ "pcl": 34,
50
+ "q": 58,
51
+ "r": 42,
52
+ "s": 26,
53
+ "sh": 11,
54
+ "t": 18,
55
+ "tcl": 7,
56
+ "th": 43,
57
+ "uh": 57,
58
+ "uw": 10,
59
+ "ux": 15,
60
+ "v": 19,
61
+ "w": 20,
62
+ "y": 50,
63
+ "z": 52,
64
+ "zh": 32
 
 
 
 
 
 
 
 
 
 
 
65
  }