sanchit-gandhi HF staff commited on
Commit
6139d87
1 Parent(s): a7a63ec

Upload config

Browse files
Files changed (1) hide show
  1. config.json +213 -206
config.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
- "_name_or_path": "openai/whisper-medium",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForAudioClassification"
7
  ],
@@ -40,215 +41,221 @@
40
  ]
41
  ],
42
  "id2label": {
43
- "0": "af_za",
44
- "1": "am_et",
45
- "10": "ca_es",
46
- "100": "yue_hant_hk",
47
- "101": "zu_za",
48
- "11": "ceb_ph",
49
- "12": "ckb_iq",
50
- "13": "cmn_hans_cn",
51
- "14": "cs_cz",
52
- "15": "cy_gb",
53
- "16": "da_dk",
54
- "17": "de_de",
55
- "18": "el_gr",
56
- "19": "en_us",
57
- "2": "ar_eg",
58
- "20": "es_419",
59
- "21": "et_ee",
60
- "22": "fa_ir",
61
- "23": "ff_sn",
62
- "24": "fi_fi",
63
- "25": "fil_ph",
64
- "26": "fr_fr",
65
- "27": "ga_ie",
66
- "28": "gl_es",
67
- "29": "gu_in",
68
- "3": "as_in",
69
- "30": "ha_ng",
70
- "31": "he_il",
71
- "32": "hi_in",
72
- "33": "hr_hr",
73
- "34": "hu_hu",
74
- "35": "hy_am",
75
- "36": "id_id",
76
- "37": "ig_ng",
77
- "38": "is_is",
78
- "39": "it_it",
79
- "4": "ast_es",
80
- "40": "ja_jp",
81
- "41": "jv_id",
82
- "42": "ka_ge",
83
- "43": "kam_ke",
84
- "44": "kea_cv",
85
- "45": "kk_kz",
86
- "46": "km_kh",
87
- "47": "kn_in",
88
- "48": "ko_kr",
89
- "49": "ky_kg",
90
- "5": "az_az",
91
- "50": "lb_lu",
92
- "51": "lg_ug",
93
- "52": "ln_cd",
94
- "53": "lo_la",
95
- "54": "lt_lt",
96
- "55": "luo_ke",
97
- "56": "lv_lv",
98
- "57": "mi_nz",
99
- "58": "mk_mk",
100
- "59": "ml_in",
101
- "6": "be_by",
102
- "60": "mn_mn",
103
- "61": "mr_in",
104
- "62": "ms_my",
105
- "63": "mt_mt",
106
- "64": "my_mm",
107
- "65": "nb_no",
108
- "66": "ne_np",
109
- "67": "nl_nl",
110
- "68": "nso_za",
111
- "69": "ny_mw",
112
- "7": "bg_bg",
113
- "70": "oc_fr",
114
- "71": "om_et",
115
- "72": "or_in",
116
- "73": "pa_in",
117
- "74": "pl_pl",
118
- "75": "ps_af",
119
- "76": "pt_br",
120
- "77": "ro_ro",
121
- "78": "ru_ru",
122
- "79": "sd_in",
123
- "8": "bn_in",
124
- "80": "sk_sk",
125
- "81": "sl_si",
126
- "82": "sn_zw",
127
- "83": "so_so",
128
- "84": "sr_rs",
129
- "85": "sv_se",
130
- "86": "sw_ke",
131
- "87": "ta_in",
132
- "88": "te_in",
133
- "89": "tg_tj",
134
- "9": "bs_ba",
135
- "90": "th_th",
136
- "91": "tr_tr",
137
- "92": "uk_ua",
138
- "93": "umb_ao",
139
- "94": "ur_pk",
140
- "95": "uz_uz",
141
- "96": "vi_vn",
142
- "97": "wo_sn",
143
- "98": "xh_za",
144
- "99": "yo_ng"
145
  },
146
  "init_std": 0.02,
147
  "is_encoder_decoder": true,
148
  "label2id": {
149
- "af_za": "0",
150
- "am_et": "1",
151
- "ar_eg": "2",
152
- "as_in": "3",
153
- "ast_es": "4",
154
- "az_az": "5",
155
- "be_by": "6",
156
- "bg_bg": "7",
157
- "bn_in": "8",
158
- "bs_ba": "9",
159
- "ca_es": "10",
160
- "ceb_ph": "11",
161
- "ckb_iq": "12",
162
- "cmn_hans_cn": "13",
163
- "cs_cz": "14",
164
- "cy_gb": "15",
165
- "da_dk": "16",
166
- "de_de": "17",
167
- "el_gr": "18",
168
- "en_us": "19",
169
- "es_419": "20",
170
- "et_ee": "21",
171
- "fa_ir": "22",
172
- "ff_sn": "23",
173
- "fi_fi": "24",
174
- "fil_ph": "25",
175
- "fr_fr": "26",
176
- "ga_ie": "27",
177
- "gl_es": "28",
178
- "gu_in": "29",
179
- "ha_ng": "30",
180
- "he_il": "31",
181
- "hi_in": "32",
182
- "hr_hr": "33",
183
- "hu_hu": "34",
184
- "hy_am": "35",
185
- "id_id": "36",
186
- "ig_ng": "37",
187
- "is_is": "38",
188
- "it_it": "39",
189
- "ja_jp": "40",
190
- "jv_id": "41",
191
- "ka_ge": "42",
192
- "kam_ke": "43",
193
- "kea_cv": "44",
194
- "kk_kz": "45",
195
- "km_kh": "46",
196
- "kn_in": "47",
197
- "ko_kr": "48",
198
- "ky_kg": "49",
199
- "lb_lu": "50",
200
- "lg_ug": "51",
201
- "ln_cd": "52",
202
- "lo_la": "53",
203
- "lt_lt": "54",
204
- "luo_ke": "55",
205
- "lv_lv": "56",
206
- "mi_nz": "57",
207
- "mk_mk": "58",
208
- "ml_in": "59",
209
- "mn_mn": "60",
210
- "mr_in": "61",
211
- "ms_my": "62",
212
- "mt_mt": "63",
213
- "my_mm": "64",
214
- "nb_no": "65",
215
- "ne_np": "66",
216
- "nl_nl": "67",
217
- "nso_za": "68",
218
- "ny_mw": "69",
219
- "oc_fr": "70",
220
- "om_et": "71",
221
- "or_in": "72",
222
- "pa_in": "73",
223
- "pl_pl": "74",
224
- "ps_af": "75",
225
- "pt_br": "76",
226
- "ro_ro": "77",
227
- "ru_ru": "78",
228
- "sd_in": "79",
229
- "sk_sk": "80",
230
- "sl_si": "81",
231
- "sn_zw": "82",
232
- "so_so": "83",
233
- "sr_rs": "84",
234
- "sv_se": "85",
235
- "sw_ke": "86",
236
- "ta_in": "87",
237
- "te_in": "88",
238
- "tg_tj": "89",
239
- "th_th": "90",
240
- "tr_tr": "91",
241
- "uk_ua": "92",
242
- "umb_ao": "93",
243
- "ur_pk": "94",
244
- "uz_uz": "95",
245
- "vi_vn": "96",
246
- "wo_sn": "97",
247
- "xh_za": "98",
248
- "yo_ng": "99",
249
- "yue_hant_hk": "100",
250
- "zu_za": "101"
251
  },
 
 
 
 
 
 
252
  "max_length": 448,
253
  "max_source_positions": 1500,
254
  "max_target_positions": 448,
@@ -346,7 +353,7 @@
346
  50362
347
  ],
348
  "torch_dtype": "float16",
349
- "transformers_version": "4.27.0.dev0",
350
  "use_cache": true,
351
  "use_weighted_layer_sum": false,
352
  "vocab_size": 51865
 
1
  {
2
+ "_name_or_path": "sanchit-gandhi/whisper-medium-fleurs-lang-id",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForAudioClassification"
8
  ],
 
41
  ]
42
  ],
43
  "id2label": {
44
+ "0": "Afrikaans",
45
+ "1": "Amharic",
46
+ "2": "Arabic",
47
+ "3": "Assamese",
48
+ "4": "Asturian",
49
+ "5": "Azerbaijani",
50
+ "6": "Belarusian",
51
+ "7": "Bulgarian",
52
+ "8": "Bengali",
53
+ "9": "Bosnian",
54
+ "10": "Catalan",
55
+ "11": "Cebuano",
56
+ "12": "Sorani-Kurdish",
57
+ "13": "Mandarin Chinese",
58
+ "14": "Czech",
59
+ "15": "Welsh",
60
+ "16": "Danish",
61
+ "17": "German",
62
+ "18": "Greek",
63
+ "19": "English",
64
+ "20": "Spanish",
65
+ "21": "Estonian",
66
+ "22": "Persian",
67
+ "23": "Fula",
68
+ "24": "Finnish",
69
+ "25": "Filipino",
70
+ "26": "French",
71
+ "27": "Irish",
72
+ "28": "Galician",
73
+ "29": "Gujarati",
74
+ "30": "Hausa",
75
+ "31": "Hebrew",
76
+ "32": "Hindi",
77
+ "33": "Croatian",
78
+ "34": "Hungarian",
79
+ "35": "Armenian",
80
+ "36": "Indonesian",
81
+ "37": "Igbo",
82
+ "38": "Icelandic",
83
+ "39": "Italian",
84
+ "40": "Japanese",
85
+ "41": "Javanese",
86
+ "42": "Georgian",
87
+ "43": "Kamba",
88
+ "44": "Kabuverdianu",
89
+ "45": "Kazakh",
90
+ "46": "Khmer",
91
+ "47": "Kannada",
92
+ "48": "Korean",
93
+ "49": "Kyrgyz",
94
+ "50": "Luxembourgish",
95
+ "51": "Ganda",
96
+ "52": "Lingala",
97
+ "53": "Lao",
98
+ "54": "Lithuanian",
99
+ "55": "Luo",
100
+ "56": "Latvian",
101
+ "57": "Maori",
102
+ "58": "Macedonian",
103
+ "59": "Malayalam",
104
+ "60": "Mongolian",
105
+ "61": "Marathi",
106
+ "62": "Malay",
107
+ "63": "Maltese",
108
+ "64": "Burmese",
109
+ "65": "Norwegian",
110
+ "66": "Nepali",
111
+ "67": "Dutch",
112
+ "68": "Northern-Sotho",
113
+ "69": "Nyanja",
114
+ "70": "Occitan",
115
+ "71": "Oromo",
116
+ "72": "Oriya",
117
+ "73": "Punjabi",
118
+ "74": "Polish",
119
+ "75": "Pashto",
120
+ "76": "Portuguese",
121
+ "77": "Romanian",
122
+ "78": "Russian",
123
+ "79": "Sindhi",
124
+ "80": "Slovak",
125
+ "81": "Slovenian",
126
+ "82": "Shona",
127
+ "83": "Somali",
128
+ "84": "Serbian",
129
+ "85": "Swedish",
130
+ "86": "Swahili",
131
+ "87": "Tamil",
132
+ "88": "Telugu",
133
+ "89": "Tajik",
134
+ "90": "Thai",
135
+ "91": "Turkish",
136
+ "92": "Ukrainian",
137
+ "93": "Umbundu",
138
+ "94": "Urdu",
139
+ "95": "Uzbek",
140
+ "96": "Vietnamese",
141
+ "97": "Wolof",
142
+ "98": "Xhosa",
143
+ "99": "Yoruba",
144
+ "100": "Cantonese Chinese",
145
+ "101": "Zulu"
146
  },
147
  "init_std": 0.02,
148
  "is_encoder_decoder": true,
149
  "label2id": {
150
+ "Afrikaans": "0",
151
+ "Amharic": "1",
152
+ "Arabic": "2",
153
+ "Armenian": "35",
154
+ "Assamese": "3",
155
+ "Asturian": "4",
156
+ "Azerbaijani": "5",
157
+ "Belarusian": "6",
158
+ "Bengali": "8",
159
+ "Bosnian": "9",
160
+ "Bulgarian": "7",
161
+ "Burmese": "64",
162
+ "Cantonese Chinese": "100",
163
+ "Catalan": "10",
164
+ "Cebuano": "11",
165
+ "Croatian": "33",
166
+ "Czech": "14",
167
+ "Danish": "16",
168
+ "Dutch": "67",
169
+ "English": "19",
170
+ "Estonian": "21",
171
+ "Filipino": "25",
172
+ "Finnish": "24",
173
+ "French": "26",
174
+ "Fula": "23",
175
+ "Galician": "28",
176
+ "Ganda": "51",
177
+ "Georgian": "42",
178
+ "German": "17",
179
+ "Greek": "18",
180
+ "Gujarati": "29",
181
+ "Hausa": "30",
182
+ "Hebrew": "31",
183
+ "Hindi": "32",
184
+ "Hungarian": "34",
185
+ "Icelandic": "38",
186
+ "Igbo": "37",
187
+ "Indonesian": "36",
188
+ "Irish": "27",
189
+ "Italian": "39",
190
+ "Japanese": "40",
191
+ "Javanese": "41",
192
+ "Kabuverdianu": "44",
193
+ "Kamba": "43",
194
+ "Kannada": "47",
195
+ "Kazakh": "45",
196
+ "Khmer": "46",
197
+ "Korean": "48",
198
+ "Kyrgyz": "49",
199
+ "Lao": "53",
200
+ "Latvian": "56",
201
+ "Lingala": "52",
202
+ "Lithuanian": "54",
203
+ "Luo": "55",
204
+ "Luxembourgish": "50",
205
+ "Macedonian": "58",
206
+ "Malay": "62",
207
+ "Malayalam": "59",
208
+ "Maltese": "63",
209
+ "Mandarin Chinese": "13",
210
+ "Maori": "57",
211
+ "Marathi": "61",
212
+ "Mongolian": "60",
213
+ "Nepali": "66",
214
+ "Northern-Sotho": "68",
215
+ "Norwegian": "65",
216
+ "Nyanja": "69",
217
+ "Occitan": "70",
218
+ "Oriya": "72",
219
+ "Oromo": "71",
220
+ "Pashto": "75",
221
+ "Persian": "22",
222
+ "Polish": "74",
223
+ "Portuguese": "76",
224
+ "Punjabi": "73",
225
+ "Romanian": "77",
226
+ "Russian": "78",
227
+ "Serbian": "84",
228
+ "Shona": "82",
229
+ "Sindhi": "79",
230
+ "Slovak": "80",
231
+ "Slovenian": "81",
232
+ "Somali": "83",
233
+ "Sorani-Kurdish": "12",
234
+ "Spanish": "20",
235
+ "Swahili": "86",
236
+ "Swedish": "85",
237
+ "Tajik": "89",
238
+ "Tamil": "87",
239
+ "Telugu": "88",
240
+ "Thai": "90",
241
+ "Turkish": "91",
242
+ "Ukrainian": "92",
243
+ "Umbundu": "93",
244
+ "Urdu": "94",
245
+ "Uzbek": "95",
246
+ "Vietnamese": "96",
247
+ "Welsh": "15",
248
+ "Wolof": "97",
249
+ "Xhosa": "98",
250
+ "Yoruba": "99",
251
+ "Zulu": "101"
252
  },
253
+ "mask_feature_length": 10,
254
+ "mask_feature_min_masks": 0,
255
+ "mask_feature_prob": 0.0,
256
+ "mask_time_length": 10,
257
+ "mask_time_min_masks": 2,
258
+ "mask_time_prob": 0.05,
259
  "max_length": 448,
260
  "max_source_positions": 1500,
261
  "max_target_positions": 448,
 
353
  50362
354
  ],
355
  "torch_dtype": "float16",
356
+ "transformers_version": "4.30.0.dev0",
357
  "use_cache": true,
358
  "use_weighted_layer_sum": false,
359
  "vocab_size": 51865