import gradio as gr import torch from transformers import pipeline from datasets import load_dataset device = "cuda:0" if torch.cuda.is_available() else "cpu" def convert_audio(): pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-small", chunk_length_s=30, device=device, ) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = ds[0]["audio"] print("Using predefined audio sample:") audio_data = sample['array'] prediction = pipe(audio_data)["text"] print(prediction) return prediction demo = gr.Interface( fn = convert_audio, inputs = None, outputs = "text", ) demo.launch(share=True)