{ "add_bos_token": false, "add_prefix_space": false, "added_tokens_decoder": { "4197": { "content": "<|endoftext|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4198": { "content": "<|startoftranscript|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4199": { "content": "<|en|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4200": { "content": "<|ru|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4201": { "content": "<|translate|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4202": { "content": "<|transcribe|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4203": { "content": "<|startoflm|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4204": { "content": "<|startofprev|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4205": { "content": "<|nocaptions|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4206": { "content": "<|notimestamps|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "additional_special_tokens": [ "<|endoftext|>", "<|startoftranscript|>", "<|en|>", "<|zh|>", "<|de|>", "<|es|>", "<|ru|>", "<|ko|>", "<|fr|>", "<|ja|>", "<|pt|>", "<|tr|>", "<|pl|>", "<|ca|>", "<|nl|>", "<|ar|>", "<|sv|>", "<|it|>", "<|id|>", "<|hi|>", "<|fi|>", "<|vi|>", "<|he|>", "<|uk|>", "<|el|>", "<|ms|>", "<|cs|>", "<|ro|>", "<|da|>", "<|hu|>", "<|ta|>", "<|no|>", "<|th|>", "<|ur|>", "<|hr|>", "<|bg|>", "<|lt|>", "<|la|>", "<|mi|>", "<|ml|>", "<|cy|>", "<|sk|>", "<|te|>", "<|fa|>", "<|lv|>", "<|bn|>", "<|sr|>", "<|az|>", "<|sl|>", "<|kn|>", "<|et|>", "<|mk|>", "<|br|>", "<|eu|>", "<|is|>", "<|hy|>", "<|ne|>", "<|mn|>", "<|bs|>", "<|kk|>", "<|sq|>", "<|sw|>", "<|gl|>", "<|mr|>", "<|pa|>", "<|si|>", "<|km|>", "<|sn|>", "<|yo|>", "<|so|>", "<|af|>", "<|oc|>", "<|ka|>", "<|be|>", "<|tg|>", "<|sd|>", "<|gu|>", "<|am|>", "<|yi|>", "<|lo|>", "<|uz|>", "<|fo|>", "<|ht|>", "<|ps|>", "<|tk|>", "<|nn|>", "<|mt|>", "<|sa|>", "<|lb|>", "<|my|>", "<|bo|>", "<|tl|>", "<|mg|>", "<|as|>", "<|tt|>", "<|haw|>", "<|ln|>", "<|ha|>", "<|ba|>", "<|jw|>", "<|su|>", "<|translate|>", "<|transcribe|>", "<|startoflm|>", "<|startofprev|>", "<|nocaptions|>", "<|notimestamps|>" ], "bos_token": "<|endoftext|>", "clean_up_tokenization_spaces": true, "eos_token": "<|endoftext|>", "errors": "replace", "model_max_length": 1024, "pad_token": "<|endoftext|>", "processor_class": "WhisperProcessor", "return_attention_mask": false, "tokenizer_class": "WhisperTokenizer", "unk_token": "<|endoftext|>" }