whisper-small-fa / model_config.yaml
arxyzan's picture
Update model_config.yaml
5886763 verified
raw
history blame
1.45 kB
name: whisper_speech_recognition
config_type: model
vocab_size: 51865
num_mel_bins: 80
encoder_layers: 12
encoder_attention_heads: 12
decoder_layers: 12
decoder_attention_heads: 12
num_hidden_layers: 12
decoder_ffn_dim: 3072
encoder_ffn_dim: 3072
encoder_layerdrop: 0.0
decoder_layerdrop: 0.0
decoder_start_token_id: 50258
use_cache: false
sampling_rate: 16000
is_encoder_decoder: true
activation_function: gelu
d_model: 768
dropout: 0.0
torch_dtype: float32
attention_dropout: 0.0
activation_dropout: 0.0
init_std: 0.02
scale_embedding: false
max_source_positions: 1500
max_target_positions: 448
pad_token_id: 50257
bos_token_id: 50257
eos_token_id: 50257
suppress_tokens: []
begin_suppress_tokens:
- 220
- 50257
use_weighted_layer_sum: false
classifier_proj_size: 256
apply_spec_augment: false
mask_time_prob: 0.05
mask_time_length: 10
mask_time_min_masks: 2
mask_feature_prob: 0.0
mask_feature_length: 10
mask_feature_min_masks: 0
max_new_tokens: 444
generation_config:
alignment_heads: null
begin_suppress_tokens: [220, 50256]
bos_token_id: 50257
decoder_start_token_id: 50258
eos_token_id: 50257
forced_decoder_ids: [[1, None], [2, 50359]]
is_multilingual: True
max_initial_timestamp_index: 50
max_length: 448
max_new_tokens: 444
no_timestamps_token_id: 50363
pad_token_id: 50257
prev_sot_token_id: 50361
return_timestamps: false
suppress_tokens: null
task_to_id:
transcribe: 50359
translate: 50358