manjugeorge commited on
Commit
19ca2c9
1 Parent(s): 2d82325

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +6 -0
  2. tokenizer_config.json +10 -0
  3. vocab.json +86 -0
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "pad_token": "[PAD]",
6
+ "replace_word_delimiter_char": " ",
7
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
+ "unk_token": "[UNK]",
9
+ "word_delimiter_token": "|"
10
+ }
vocab.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": 27,
3
+ "1": 19,
4
+ "2": 1,
5
+ "3": 63,
6
+ "4": 50,
7
+ "5": 12,
8
+ "7": 18,
9
+ "9": 76,
10
+ "[PAD]": 83,
11
+ "[UNK]": 82,
12
+ "o": 71,
13
+ "|": 23,
14
+ "ം": 51,
15
+ "അ": 54,
16
+ "ആ": 14,
17
+ "ഇ": 39,
18
+ "ഈ": 41,
19
+ "ഉ": 62,
20
+ "ഊ": 16,
21
+ "എ": 69,
22
+ "ഏ": 46,
23
+ "ഐ": 79,
24
+ "ഒ": 30,
25
+ "ഓ": 56,
26
+ "ഔ": 67,
27
+ "ക": 13,
28
+ "ഖ": 77,
29
+ "ഗ": 47,
30
+ "ഘ": 61,
31
+ "ങ": 68,
32
+ "ച": 7,
33
+ "ഛ": 26,
34
+ "ജ": 81,
35
+ "ഞ": 28,
36
+ "ട": 20,
37
+ "ഠ": 74,
38
+ "ഡ": 38,
39
+ "ഢ": 15,
40
+ "ണ": 6,
41
+ "ത": 21,
42
+ "ഥ": 17,
43
+ "ദ": 35,
44
+ "ധ": 37,
45
+ "ന": 70,
46
+ "പ": 55,
47
+ "ഫ": 4,
48
+ "ബ": 78,
49
+ "ഭ": 3,
50
+ "മ": 32,
51
+ "യ": 49,
52
+ "ര": 80,
53
+ "റ": 58,
54
+ "ല": 42,
55
+ "ള": 72,
56
+ "ഴ": 25,
57
+ "വ": 8,
58
+ "ശ": 65,
59
+ "ഷ": 40,
60
+ "സ": 0,
61
+ "ഹ": 48,
62
+ "ാ": 33,
63
+ "ി": 75,
64
+ "ീ": 73,
65
+ "ു": 9,
66
+ "ൂ": 10,
67
+ "ൃ": 5,
68
+ "െ": 43,
69
+ "േ": 52,
70
+ "ൈ": 24,
71
+ "ൊ": 53,
72
+ "ോ": 11,
73
+ "ൌ": 60,
74
+ "്": 45,
75
+ "ൗ": 34,
76
+ "ൺ": 57,
77
+ "ൻ": 64,
78
+ "ർ": 2,
79
+ "ൽ": 44,
80
+ "ൾ": 29,
81
+ "‌": 22,
82
+ "‍": 36,
83
+ "“": 66,
84
+ "”": 31,
85
+ "": 59
86
+ }