Gastron
/

lp-initial-aed-short

Automatic Speech Recognition

Model card Files Files and versions Community

Aku Rouhe commited on Dec 3, 2021

Commit

f6bcbf4

•

1 Parent(s): d3f34b7

Hyperparams

Files changed (2) hide show

.gitattributes +1 -0
hyperparams.yaml +15 -6

.gitattributes CHANGED Viewed

@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text

hyperparams.yaml CHANGED Viewed

@@ -102,6 +102,11 @@ seq_lin: !new:speechbrain.nnet.linear.Linear
     input_size: !ref <dec_neurons>
     n_neurons: !ref <output_neurons>
 # Final softmax (for log posteriors computation).
 log_softmax: !new:speechbrain.nnet.activations.Softmax
     apply_log: True
@@ -115,10 +120,18 @@ model: !new:torch.nn.ModuleList
       - !ref <ctc_lin>
       - !ref <seq_lin>
 test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
     embedding: !ref <embedding>
     decoder: !ref <decoder>
     linear: !ref <seq_lin>
     bos_index: !ref <bos_index>
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -135,14 +148,10 @@ test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
 # Objects in "modules" dict will have their parameters moved to the correct
 # device, as well as having train()/eval() called on them by the Brain class
 modules:
-    encoder: !ref <encoder>
-    embedding: !ref <embedding>
-    dec: !ref <decoder>
     decoder: !ref <test_search>
-    seq_lin: !ref <seq_lin>
-    normalize: !ref <normalize>
-pretrainer !new:speechbrain.utils.parameter_transfer
     loadables:
         model: !ref <model>
         normalizer: !ref <normalize>

     input_size: !ref <dec_neurons>
     n_neurons: !ref <output_neurons>
+# Linear transformation on the top of the encoder.
+ctc_lin: !new:speechbrain.nnet.linear.Linear
+    input_size: !ref <dnn_neurons>
+    n_neurons: !ref <output_neurons>
 # Final softmax (for log posteriors computation).
 log_softmax: !new:speechbrain.nnet.activations.Softmax
     apply_log: True
       - !ref <ctc_lin>
       - !ref <seq_lin>
+full_encode_step: !new:speechbrain.nnet.containers.LengthsCapableSequential
+    input_shape: [null, null, !ref <n_mels>]
+    compute_features: !ref <compute_features>
+    normalize: !ref <normalize>
+    model: !ref <encoder>
 test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
     embedding: !ref <embedding>
     decoder: !ref <decoder>
     linear: !ref <seq_lin>
+    ctc_linear: !ref <ctc_lin>
     bos_index: !ref <bos_index>
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
 # Objects in "modules" dict will have their parameters moved to the correct
 # device, as well as having train()/eval() called on them by the Brain class
 modules:
+    encoder: !ref <full_encode_step>
     decoder: !ref <test_search>
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
     loadables:
         model: !ref <model>
         normalizer: !ref <normalize>