File size: 1,445 Bytes
69cc94e
 
 
 
 
 
 
 
 
 
 
 
 
 
5886763
7745bd6
69cc94e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97f4abd
4a6b087
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
name: whisper_speech_recognition
config_type: model
vocab_size: 51865
num_mel_bins: 80
encoder_layers: 12
encoder_attention_heads: 12
decoder_layers: 12
decoder_attention_heads: 12
num_hidden_layers: 12
decoder_ffn_dim: 3072
encoder_ffn_dim: 3072
encoder_layerdrop: 0.0
decoder_layerdrop: 0.0
decoder_start_token_id: 50258
use_cache: false
sampling_rate: 16000
is_encoder_decoder: true
activation_function: gelu
d_model: 768
dropout: 0.0
torch_dtype: float32
attention_dropout: 0.0
activation_dropout: 0.0
init_std: 0.02
scale_embedding: false
max_source_positions: 1500
max_target_positions: 448
pad_token_id: 50257
bos_token_id: 50257
eos_token_id: 50257
suppress_tokens: []
begin_suppress_tokens:
- 220
- 50257
use_weighted_layer_sum: false
classifier_proj_size: 256
apply_spec_augment: false
mask_time_prob: 0.05
mask_time_length: 10
mask_time_min_masks: 2
mask_feature_prob: 0.0
mask_feature_length: 10
mask_feature_min_masks: 0
max_new_tokens: 444
generation_config:
  alignment_heads: null
  begin_suppress_tokens: [220, 50256]
  bos_token_id: 50257
  decoder_start_token_id: 50258
  eos_token_id: 50257
  forced_decoder_ids: [[1, None], [2, 50359]]
  is_multilingual: True
  max_initial_timestamp_index: 50
  max_length: 448
  max_new_tokens: 444
  no_timestamps_token_id: 50363
  pad_token_id: 50257
  prev_sot_token_id: 50361
  return_timestamps: false
  suppress_tokens: null 
  task_to_id:
    transcribe: 50359
    translate: 50358