Spaces:
Sleeping
Sleeping
File size: 1,821 Bytes
8832bce c0f52ab 8832bce 02f063b 8832bce 0364cb3 8832bce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from pytube import YouTube
import whisper
# define function for transcription
def whisper_transcript(model_size, url, audio_file):
if url:
link = YouTube(url)
source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
else:
source = audio_file
if model_size.endswith(".en"):
language = "english"
else:
language = None
options = whisper.DecodingOptions(without_timestamps=True)
loaded_model = whisper.load_model(model_size)
transcript = loaded_model.transcribe(source, language=language)
return transcript["text"]
# define Gradio app interface
gradio_ui = gr.Interface(
fn=whisper_transcript,
title="Transcribe multi-lingual audio clips with Whisper",
description="**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit. If your clip is **100% in English, select models ending in ‘.en’**. If the clip is in other languages, or a mix of languages, select models without ‘.en’",
article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
inputs=[
gr.Dropdown(
label="Select Model",
choices=[
"tiny.en",
"base.en",
"small.en",
"medium.en",
"tiny",
"base",
"small",
"medium",
"large",
],
value="base",
),
gr.Textbox(label="Paste YouTube link here"),
gr.Audio(label="Upload Audio File", sources=["upload", "microphone"], type="filepath"),
],
outputs=gr.Textbox(label="Whisper Transcript"),
)
gradio_ui.queue().launch()
|