File size: 3,687 Bytes
4052b22 d7dae2d 9ffc810 d7dae2d 75fba1e d7dae2d fb7aeae 8ba3782 ce5d7b2 d7dae2d 237e3b5 5bb4e7f b1a2210 d7dae2d 4052b22 194d529 b1a2210 d10cfe4 29cf60d 4052b22 d7dae2d 64e99e6 d7dae2d 4052b22 64e99e6 060a393 67d6685 d7dae2d 4052b22 d7dae2d 7c0224a 75fba1e 2a4098b d7dae2d 29cf60d d7dae2d fb7aeae d7dae2d ce5d7b2 d7dae2d 237e3b5 ce5d7b2 d7dae2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# EZ-Voice-Clone-From-Long-Text
import gradio as gr
import torch
from pathlib import Path
from pytube import YouTube
from pydub import AudioSegment
from TTS.api import TTS
import uuid
import os
test_audio="./shufflin.wav"
uid = uuid.uuid4()
device = "cuda" if torch.cuda.is_available() else "cpu"
def custom_bark(inp, in_aud=None, trim_aud=None, in_aud_mic=None):
if in_aud_mic != None:
speaker_wav=in_aud_mic
if in_aud !=None and trim_aud==None:
speaker_wav=in_aud
#speaker_wav=Path(f"{uid}-tmp_aud.mp4")
if trim_aud != None:
speaker_wav=Path(f"{uid}-trim.wav")
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
return (f"{uid}-output.wav")
def load_video_yt(vid):
yt = YouTube(vid)
vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
vid_aud = yt.streams.filter(only_audio=True)[0].download(filename=f"{uid}-tmp_aud.mp4")
print (f'Video Length: {yt.length}')
return vid, vid_aud, f"{uid}-tmp_aud.mp4"
def trim_clip(clip, start_t, end_t):
clip = Path(f"{clip}")
song = AudioSegment.from_file(f"{clip}", format="mp4")
#song = AudioSegment.from_file(Path(f"{clip}"), format="mp4")
start_min = int(start_t.split(":",1)[0])
start_sec = int(start_t.split(":",1)[1])
end_min = int(end_t.split(":",1)[0])
end_sec = int(end_t.split(":",1)[1])
start = ((start_min*60)+start_sec)*1000
end = ((end_min*60)+end_sec)*1000
song_clip = song[start: end]
song_clip.export(f"{uid}-trim.wav", format="wav")
print("New Audio file is created and saved")
return f"{uid}-trim.wav"
def pre_aud(inp):
print(inp)
song = AudioSegment.from_file(Path(f'{inp}'), format="mp4")
song.export(f"{uid}-tmp_aud.mp4", format="mp4")
print(f'pre_aud:: {f"{uid}-tmp_aud.mp4"}')
return inp
with gr.Blocks() as app:
with gr.Group():
with gr.Row():
in_text = gr.Textbox(lines = 6, max_lines = 20)
with gr.Column():
alt_go_btn = gr.Button()
out_audio = gr.Audio(interactive=False)
with gr.Group():
with gr.Row():
gr.Markdown('''<H1> Audio Source:''')
with gr.Row():
with gr.Column():
#in_aud_mic = gr.Audio(source='microphone')
in_aud_file = gr.Audio(label = 'Audio Source', sources=['microphone','upload'], interactive = True,type='filepath', value=test_audio)
aud_file = gr.File(interactive=False,visible=True)
with gr.Row():
start_time = gr.Textbox(label = "Start", value = "0:00", placeholder = "0:23")
end_time = gr.Textbox(label = "End", value = "0:01", placeholder = "1:12")
trim_clip_btn = gr.Button("Trim Clip")
trim_aud = gr.Audio(label = 'Trimmed Audio Source', sources=['upload'], interactive = False)
with gr.Column():
in_aud_yt = gr.Textbox(label="YouTube URL")
load_yt_btn = gr.Button("Load URL")
yt_vid = gr.Video(interactive=False)
#in_aud_file.change(pre_aud,in_aud_file,aud_file)
load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
trim_clip_btn.click(trim_clip,[in_aud_file, start_time, end_time],trim_aud)
alt_go_btn.click(custom_bark, [in_text,in_aud_file,trim_aud], out_audio)
app.launch() |