|
from pprint import pprint |
|
from time import time |
|
from faster_whisper import WhisperModel |
|
|
|
model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster") |
|
test_audio = [ |
|
"kotoba-whisper-eval/audio/long_interview_1.wav", |
|
"kotoba-whisper-eval/audio/manzai1.wav", |
|
"kotoba-whisper-eval/audio/manzai2.wav", |
|
"kotoba-whisper-eval/audio/manzai3.wav" |
|
] |
|
result = {} |
|
for test_audio_file in test_audio: |
|
start = time() |
|
segments, info = model.transcribe(test_audio_file, language="ja", chunk_length=15, condition_on_previous_text=False) |
|
for segment in segments: |
|
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) |
|
elapsed = time() - start |
|
result[test_audio_file] = elapsed |
|
pprint(result) |
|
|