File size: 1,929 Bytes
ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac ec7e849 7e93eac 55f7a72 ec7e849 7e93eac ec7e849 3d42703 ec7e849 7e93eac ec7e849 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
{
"_name_or_path": "facebook/hubert-base-ls960",
"activation_dropout": 0.1,
"apply_spec_augment": true,
"architectures": [
"HubertForSpeechClassification"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"classifier_proj_size": 256,
"conv_bias": false,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "sum",
"ctc_zero_infinity": false,
"do_stable_layer_norm": false,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "group",
"feat_proj_dropout": 0.1,
"feat_proj_layer_norm": true,
"final_dropout": 0.1,
"finetuning_task": "wav2vec2_clf",
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "01Neutral",
"1": "02Fearful",
"2": "03Happy",
"3": "04Sad",
"4": "05Angry"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"01Neutral": 0,
"02Fearful": 1,
"03Happy": 2,
"04Sad": 3,
"05Angry": 4
},
"layer_norm_eps": 1e-05,
"layerdrop": 0.1,
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_prob": 0.05,
"model_type": "hubert",
"num_attention_heads": 12,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 12,
"pad_token_id": 0,
"pooling_mode": "mean",
"problem_type": "single_label_classification",
"tokenizer_class": "Wav2Vec2CTCTokenizer",
"torch_dtype": "float32",
"transformers_version": "4.39.3",
"use_weighted_layer_sum": false,
"vocab_size": 32
}
|