|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import json |
|
import os |
|
import tempfile |
|
import shutil |
|
import requests |
|
from pathlib import Path |
|
temp_dir = tempfile.TemporaryDirectory() |
|
global ckpt_temp_file |
|
global audio_temp_file |
|
global config_temp_file |
|
|
|
from utils.hparams import hparams |
|
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import IPython.display as ipd |
|
import utils |
|
import librosa |
|
import torchcrepe |
|
from infer import * |
|
import logging |
|
from infer_tools.infer_tool import * |
|
import io |
|
import parselmouth |
|
from parselmouth.praat import call |
|
|
|
clip_completed = False |
|
def render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, choice, noise_step, use_mel_as_base): |
|
logging.getLogger('numba').setLevel(logging.WARNING) |
|
title = int(title) |
|
title2 = int(title2) |
|
project_name = "Unnamed" |
|
model_path = ckpt_temp_file |
|
config_path= config_temp_file |
|
hubert_gpu=True |
|
svc_model = Svc(project_name,config_path,hubert_gpu, model_path) |
|
print('model loaded') |
|
wav_fn = audio_temp_file |
|
demoaudio, sr = librosa.load(wav_fn) |
|
key = title |
|
|
|
pndm_speedup = 20 |
|
wav_gen='que.wav' |
|
|
|
|
|
with st.spinner("Rendering Audio..."): |
|
f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=title2, use_crepe=choice, use_pe=False, thre=0.05, |
|
use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen) |
|
|
|
formant_shift_ratio_str = title3 |
|
formant_shift_ratio = float(formant_shift_ratio_str) |
|
|
|
|
|
if formant_shift_ratio != 1.0: |
|
sound = parselmouth.Sound(wav_gen) |
|
print(wav_gen) |
|
sound.get_power() |
|
sampling_rate = sound.sampling_frequency |
|
print(sampling_rate) |
|
resampled_sound = sound.resample(sampling_rate) |
|
print(resampled_sound) |
|
factor = formant_shift_ratio |
|
print(factor) |
|
manipulated_sound = call(sound, "Change gender", 75, 500, factor, 0, 1, 1) |
|
print(manipulated_sound) |
|
manipulated_sound.save("que.wav", "WAV") |
|
print("Gender correct!") |
|
clip_completed = True |
|
if clip_completed: |
|
st.audio(wav_gen) |
|
|
|
|
|
st.set_page_config( |
|
page_title="DiffSVC Render", |
|
page_icon="🧊", |
|
initial_sidebar_state="expanded", |
|
) |
|
|
|
col1, col2 = st.columns(2) |
|
col1.title('DIFF-SVC Render') |
|
col2.title('Settings') |
|
ckpt = col1.file_uploader("Choose your CKPT", type='ckpt') |
|
if ckpt is not None: |
|
|
|
with tempfile.NamedTemporaryFile(mode="wb", suffix='.ckpt', delete=False) as temp: |
|
|
|
bytes_data = ckpt.getvalue() |
|
|
|
temp.write(bytes_data) |
|
ckpt_temp_file = temp.name |
|
|
|
print(temp.name) |
|
|
|
if "ckpt_temp_file" in locals(): |
|
st.success("File saved to: {}".format(ckpt_temp_file)) |
|
|
|
config = col1.file_uploader("Choose your config", type='yaml') |
|
if config is not None: |
|
|
|
with tempfile.NamedTemporaryFile(mode="wb", suffix='.yaml', delete=False) as temp: |
|
|
|
bytes_data = config.getvalue() |
|
|
|
temp.write(bytes_data) |
|
config_temp_file = temp.name |
|
|
|
print(temp.name) |
|
|
|
if "config_temp_file" in locals(): |
|
st.success("File saved to: {}".format(config_temp_file)) |
|
|
|
audio = col1.file_uploader("Choose your audio", type=["wav"]) |
|
if audio is not None: |
|
|
|
with tempfile.NamedTemporaryFile(mode="wb", suffix='.wav', delete=False) as temp: |
|
|
|
bytes_data = audio.getvalue() |
|
|
|
temp.write(bytes_data) |
|
audio_temp_file = temp.name |
|
|
|
print(temp.name) |
|
|
|
if "audio_temp_file" in locals(): |
|
st.success("File saved to: {}".format(audio_temp_file)) |
|
|
|
title = col2.number_input("Key", value=0, step=1, min_value=-12, max_value=12) |
|
title2 = col2.number_input("Speedup", value=20, step=1, min_value=1, max_value=100) |
|
title3 = col2.number_input("Gender Flag", value=1.00, step=0.01, min_value=0.70, max_value=1.30, help='Default is 1.0, it works by decimals, setting it at 1.05 will make your render sound more female-ish, setting it to 0.95 will make it sound more masculine, for example.') |
|
choice = col2.checkbox('Use Crepe', value=False) |
|
|
|
use_mel_as_base = col2.checkbox('Use Mel as Base', value=False, help='gt mel: Enabling this will use the input audio as a base and will unlock a new parameter, do not use this if you dont know what it does.') |
|
noise_step = 600 |
|
|
|
if use_mel_as_base: |
|
noise_step = col2.number_input('Noise Step', value=600, min_value=1, max_value=1000, step=50) |
|
else: |
|
noise_step = 600 |
|
password = col2.text_input("Enter password", help='Hi,Zacgo') |
|
correct_password = "Zacgo" |
|
|
|
if st.button("Render audio"): |
|
if password == "Zacgo": |
|
render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, noise_step, choice, use_mel_as_base) |
|
|
|
else: |
|
render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, noise_step, choice, use_mel_as_base) |
|
|