#Imports import whisper import gradio as gr import warnings from gtts import gTTS warnings.filterwarnings("ignore") model = whisper.load_model("base") def transcribe(audio): language = 'en' audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) print(f"Detected language: {max(probs, key=probs.get)}") options = whisper.DecodingOptions(fp16 = False) result = whisper.decode(model, mel, options) result_text = result.text result_tr = model.transcribe(audio ,task='translate') audioobj = gTTS(text = result_tr['text'], lang = language, slow = False) audioobj.save("Temp.mp3") return [result_text, result_tr['text'], "Temp.mp3"] output_1 = gr.Textbox(label="Speech to Text") output_2 = gr.Textbox(label="English Translation Output") output_3 = gr.Audio("Temp.mp3", label="English Audio") gr.Interface( title = 'OpenAI Whisper ASR and Translation Gradio Web UI', fn=transcribe, inputs=[ # gr.inputs.Audio(source="microphone", type="filepath"), gr.Audio(source="upload", type="filepath") ], outputs=[ output_1, output_2, output_3 ], live=True).launch(debug=True)