{ "architectures": [ "Effb2TrmCaptioningModel" ], "attn_emb_dim": 1408, "decoder_dropout": 0.2, "decoder_emb_dim": 256, "decoder_n_layers": 2, "decoder_we_tie_weights": true, "fc_emb_dim": 1408, "sample_rate": 16000, "shared_dim": 1024, "tchr_dim": 768, "torch_dtype": "float32", "transformers_version": "4.30.2", "vocab_size": 4981 }