from pprint import pprint from time import time from faster_whisper import WhisperModel model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster") test_audio = [ "kotoba-whisper-eval/audio/long_interview_1.wav", "kotoba-whisper-eval/audio/manzai1.wav", "kotoba-whisper-eval/audio/manzai2.wav", "kotoba-whisper-eval/audio/manzai3.wav" ] result = {} for test_audio_file in test_audio: start = time() segments, info = model.transcribe(test_audio_file, language="ja", chunk_length=15, condition_on_previous_text=False) for segment in segments: print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) elapsed = time() - start result[test_audio_file] = elapsed pprint(result)