patrickvonplaten commited on
Commit
1c2ce0d
1 Parent(s): c553437
config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "apply_spec_augment": true,
4
+ "architectures": [
5
+ "Wav2Vec2ForCTC"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 1,
9
+ "classifier_proj_size": 256,
10
+ "codevector_dim": 768,
11
+ "contrastive_logits_temperature": 0.1,
12
+ "conv_bias": true,
13
+ "conv_dim": [
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512
21
+ ],
22
+ "conv_kernel": [
23
+ 10,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 2,
29
+ 2
30
+ ],
31
+ "conv_stride": [
32
+ 5,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2
39
+ ],
40
+ "ctc_loss_reduction": "sum",
41
+ "ctc_zero_infinity": false,
42
+ "diversity_loss_weight": 0.1,
43
+ "do_stable_layer_norm": true,
44
+ "eos_token_id": 2,
45
+ "feat_extract_activation": "gelu",
46
+ "feat_extract_dropout": 0.0,
47
+ "feat_extract_norm": "layer",
48
+ "feat_proj_dropout": 0.1,
49
+ "feat_quantizer_dropout": 0.0,
50
+ "final_dropout": 0.0,
51
+ "gradient_checkpointing": false,
52
+ "hidden_act": "gelu",
53
+ "hidden_dropout": 0.1,
54
+ "hidden_size": 1024,
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 4096,
57
+ "layer_norm_eps": 1e-05,
58
+ "layerdrop": 0.1,
59
+ "mask_channel_length": 10,
60
+ "mask_channel_min_space": 1,
61
+ "mask_channel_other": 0.0,
62
+ "mask_channel_prob": 0.0,
63
+ "mask_channel_selection": "static",
64
+ "mask_feature_length": 10,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_space": 1,
68
+ "mask_time_other": 0.0,
69
+ "mask_time_prob": 0.075,
70
+ "mask_time_selection": "static",
71
+ "model_type": "wav2vec2",
72
+ "num_attention_heads": 16,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
+ "num_negatives": 100,
80
+ "pad_token_id": 0,
81
+ "proj_codevector_dim": 768,
82
+ "torch_dtype": "float32",
83
+ "transformers_version": "4.13.0.dev0",
84
+ "use_weighted_layer_sum": false,
85
+ "vocab_size": 272
86
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d491ba3ff62738e2cf3e0094375cff9e6c74b26f9038cde140625a45ed60cd
3
+ size 1263043159
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 1, "<pad>": 0, "</s>": 2, "<unk>": 3, "n": 4, "t": 5, "s": 6, "a": 7, "ɪ": 8, "l": 9, "ə": 10, "d": 11, "ɛ": 12, "e": 13, "k": 14, "i": 15, "m": 16, "o": 17, "p": 18, "z": 19, "ʁ": 20, "b": 21, "v": 22, "f": 23, "j": 24, "r": 25, "ɻ": 26, "u": 27, "w": 28, "ɔ": 29, "æ": 30, "ʊ": 31, "ɒ": 32, "iː": 33, "ɾ": 34, "ʃ": 35, "h": 36, "θ": 37, "eɪ": 38, "ŋ": 39, "y": 40, "ð": 41, "R": 42, "ɡ": 43, "ɜ": 44, "g": 45, "uː": 46, "ɔː": 47, "aɪ": 48, "ʌ": 49, "x": 50, "aː": 51, "əʊ": 52, "ɑː": 53, "ɜː": 54, "n̩": 55, "ɑ": 56, "eː": 57, "ɲ": 58, "ʒ": 59, "dʒ": 60, "l̩": 61, "ɑ̃": 62, "ai": 63, "ts": 64, "β": 65, "œ": 66, "oː": 67, "aʊ": 68, "ɔ̃": 69, "ʏ": 70, "ɪə": 71, "ɛə": 72, "ɥ": 73, "ʂ": 74, "ɕ": 75, "au": 76, "ɨ": 77, "c": 78, "ɯ": 79, "ɫ": 80, "ʔ": 81, "’": 82, "ɛ̃": 83, "ʈʂ": 84, "tɕ": 85, "ø": 86, "lʲ": 87, "ʎ": 88, "ʊə": 89, "tʲ": 90, "rʲ": 91, "ɔɪ": 92, "nʲ": 93, "sʲ": 94, "tʰ": 95, "ʕ": 96, "tɕʰ": 97, "ʈʂʰ": 98, "ɣ": 99, "ɔy": 100, "dʲ": 101, "kʰ": 102, "q": 103, "ʐ": 104, "ɹ": 105, "ɕʲ": 106, "vʲ": 107, "ɝ": 108, "mʲ": 109, "ħ": 110, "tʃ": 111, "iʲ": 112, "tsʰ": 113, "ʐʲ": 114, "ɤ": 115, "ɛː": 116, "ɟ": 117, "pʰ": 118, "ɦ": 119, "ʋ": 120, "ʈ": 121, "pʲ": 122, "bʲ": 123, "r̝": 124, "ɚ": 125, "aʲ": 126, "sˤ": 127, "oʲ": 128, "ɐ̃": 129, "tˤ": 130, "ʂʲ": 131, "ʉ": 132, "ɭ": 133, "øː": 134, "yː": 135, "pf": 136, "dˤ": 137, "ʲ": 138, "ɨʲ": 139, "ɱ": 140, "eʲ": 141, "ç": 142, "ʉː": 143, "ɳ": 144, "ɽ": 145, "ù": 146, "ɑ̃ː": 147, "æʲ": 148, "kʲ": 149, "ɡʲ": 150, "ðˤ": 151, "fʲ": 152, "yʲ": 153, "ɧ": 154, "ː": 155, "ɡj": 156, "zʲ": 157, "á": 158, "n̪": 159, "ɔ̃ː": 160, "ɑː̌": 161, "í": 162, "ɵ": 163, "ü": 164, "â": 165, "ö": 166, "ó": 167, "ɒ̃": 168, "ʃʲ": 169, "ř": 170, "œ̃": 171, "ō": 172, "t̪": 173, "ɪ̌": 174, "ë": 175, "iː̂": 176, "oː̂": 177, "uː̌": 178, "ʒʲ": 179, "ɪ̂": 180, "õ": 181, "ẽ": 182, "oi": 183, "tʃʲ": 184, "æː̌": 185, "d̪": 186, "ě": 187, "eː̂": 188, "oː̌": 189, "̇": 190, "iː̌": 191, "tsʲ": 192, "ʊ̂": 193, "ň": 194, "ã": 195, "õ": 196, "ɛ̂": 197, "æː": 198, "ʊ̌": 199, "bʰ": 200, "ʝ": 201, "t̠ʃ": 202, "æe̯": 203, "uː̂": 204, "∅": 205, "ɔ̌": 206, "ʊ̯": 207, "dʰ": 208, "xʲ": 209, "eː̌": 210, "dʒʲ": 211, "mː": 212, "ɨː": 213, "nʲ̌": 214, "rʲ̌": 215, "ɔ̂": 216, "bʱ": 217, "ɡʰ": 218, "â": 219, "ʰ": 220, "ʃʰ": 221, "æ̃": 222, "ɫ̌": 223, "æ̃ː": 224, "ʊ̃": 225, "ʒʱ": 226, "ã": 227, "ɖ": 228, "lʲ̌": 229, "zː": 230, "ʱ": 231, "lː": 232, "nː": 233, "ou": 234, "lʰ": 235, "ɑː̂": 236, "ʈʰ": 237, "gʱ": 238, "dː": 239, "ɣʲ": 240, "jʲ": 241, "m̌": 242, "mʲ̌": 243, "dz": 244, "ũ": 245, "rː": 246, "oi̯": 247, "m̩": 248, "ʊi": 249, "2": 250, "ɽʱ": 251, "ɟː": 252, ":": 253, "tsː": 254, "jː": 255, "ǐ": 256, "ʌi": 257, "۷": 258, "3": 259, "tː": 260, "ʃː": 261, "æː̂": 262, "ŋ̩": 263, "ɖʱ": 264, "gʰ": 265, "4": 266, "hː": 267, "dzː": 268, "ɲː": 269, "tʃː": 270, "ʉʲ": 271}