Spaces:
Sleeping
Sleeping
File size: 3,545 Bytes
9ba2a1c 7233e3e 9ba2a1c a67942c d4961b6 9ba2a1c fbe6007 9ba2a1c 7233e3e a67942c d4961b6 7233e3e 9ba2a1c a67942c 7233e3e 9ba2a1c 7233e3e a67942c 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e 9ba2a1c bee3039 dd48e3a 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e 9ba2a1c bee3039 dd48e3a 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e 9ba2a1c 7233e3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import torch
import time
import gradio as gr
import spaces
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
DEFAULT_MODEL_NAME = "distil-whisper/distil-large-v3"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
def load_pipeline(model_name):
return pipeline(
task="automatic-speech-recognition",
model=model_name,
chunk_length_s=30,
device=device,
)
pipe = load_pipeline(DEFAULT_MODEL_NAME)
@spaces.GPU
def transcribe(inputs, task, model_name):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
global pipe
if model_name != pipe.model.name_or_path:
pipe = load_pipeline(model_name)
start_time = time.time() # Record the start time
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
end_time = time.time() # Record the end time
transcription_time = end_time - start_time # Calculate the transcription time
# Create the transcription time output with additional information
transcription_time_output = (
f"Transcription Time: {transcription_time:.2f} seconds\n"
f"Model Used: {model_name}\n"
f"Device Used: {'GPU' if torch.cuda.is_available() else 'CPU'}"
)
return text, transcription_time_output
demo = gr.Blocks()
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
gr.Textbox(
label="Model Name",
value=DEFAULT_MODEL_NAME,
placeholder="Enter the model name",
info="Some available models: distil-whisper/distil-large-v3 distil-whisper/distil-medium.en Systran/faster-distil-whisper-large-v3 Systran/faster-whisper-large-v3 Systran/faster-whisper-medium openai/whisper-tiny , openai/whisper-base, openai/whisper-medium, openai/whisper-large-v3"
),
],
outputs=[gr.TextArea(label="Transcription"), gr.TextArea(label="Transcription Info")],
theme="huggingface",
title="Whisper Transcription",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the specified OpenAI Whisper"
" checkpoint and 🤗 Transformers to transcribe audio files of arbitrary length."
),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(type="filepath", label="Audio file"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
gr.Textbox(
label="Model Name",
value=DEFAULT_MODEL_NAME,
placeholder="Enter the model name",
info="Some available models: openai/whisper-tiny, openai/whisper-base, openai/whisper-medium, openai/whisper-large-v2"
),
],
outputs=[gr.TextArea(label="Transcription"), gr.TextArea(label="Transcription Info")],
theme="huggingface",
title="Whisper Transcription",
description=(
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the specified OpenAI Whisper"
" checkpoint and 🤗 Transformers to transcribe audio files of arbitrary length."
),
allow_flagging="never",
)
with demo:
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
demo.launch(share=True) |