import gradio as gr import torch from pathlib import Path from pytube import YouTube from pydub import AudioSegment from TTS.api import TTS import uuid uid = uuid.uuid4() device = "cuda" if torch.cuda.is_available() else "cpu" def custom_bark(inp, in_aud=None, trim_aud=None, in_aud_mic=None): if in_aud_mic != None: speaker_wav=in_aud_mic if in_aud !=None and trim_aud==None: speaker_wav=Path(f"{uid}-tmp_aud.mp4") if trim_aud != None: speaker_wav=Path(f"{uid}-trim.wav") tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device) tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav") return (f"{uid}-output.wav") def load_video_yt(vid): yt = YouTube(vid) vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4") vid_aud = yt.streams.filter(only_audio=True)[0].download(filename=f"{uid}-tmp_aud.mp4") print (f'Video Length: {yt.length}') return vid, vid_aud, f"{uid}-tmp_aud.mp4" def trim_clip(clip, start_t, end_t): clip = Path(f"{uid}-tmp_aud.mp4") song = AudioSegment.from_file(f"{uid}-tmp_aud.mp4", format="mp4") start_min = int(start_t.split(":",1)[0]) start_sec = int(start_t.split(":",1)[1]) end_min = int(end_t.split(":",1)[0]) end_sec = int(end_t.split(":",1)[1]) start = ((start_min*60)+start_sec)*1000 end = ((end_min*60)+end_sec)*1000 song_clip = song[start: end] song_clip.export(f"{uid}-trim.wav", format="wav") print("New Audio file is created and saved") return f"{uid}-trim.wav" with gr.Blocks() as app: with gr.Box(): with gr.Row(): in_text = gr.Textbox(lines = 6, max_lines = 20) with gr.Column(): alt_go_btn = gr.Button() out_audio = gr.Audio(interactive=False) with gr.Box(): with gr.Row(): gr.Markdown('''