File size: 6,153 Bytes
473226d
 
 
 
 
 
 
 
 
 
f97b8b1
473226d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95add38
473226d
 
95add38
473226d
 
c0c17c5
7ed5ac7
473226d
 
 
 
 
 
 
 
 
 
 
95add38
473226d
 
 
6a6891d
 
83b7afc
79f1f37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473226d
 
38e8c1b
473226d
 
 
 
 
 
 
 
 
3ce3c7f
 
 
 
81692a3
473226d
81692a3
473226d
81692a3
473226d
81692a3
 
473226d
81692a3
 
 
 
 
3ce3c7f
81692a3
 
 
 
 
 
 
 
 
 
 
 
 
3ce3c7f
 
81692a3
 
 
 
 
 
 
 
 
 
473226d
81692a3
 
3ce3c7f
 
 
 
95ec93c
3ce3c7f
 
 
 
 
 
 
 
 
9216d6a
3ce3c7f
473226d
a9b5548
95ec93c
6bee093
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
import tempfile
import shutil
import requests
from pathlib import Path
temp_dir = tempfile.TemporaryDirectory()
global ckpt_temp_file
global audio_temp_file
global config_temp_file
###################################################
from utils.hparams import hparams
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import utils
import librosa
import torchcrepe
from infer import *
import logging
from infer_tools.infer_tool import *
import io
import parselmouth

clip_completed = False
def render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, choice, noise_step, use_mel_as_base):
    logging.getLogger('numba').setLevel(logging.WARNING)
    title = int(title)
    title2 = int(title2)
    title3 = formant_shift_ratio
    project_name = "Unnamed"
    model_path = ckpt_temp_file
    config_path= config_temp_file
    hubert_gpu=True
    svc_model = Svc(project_name,config_path,hubert_gpu, model_path)
    print('model loaded')
    wav_fn = audio_temp_file
    demoaudio, sr = librosa.load(wav_fn)
    key = title # 音高调整,支持正负(半音)
    # 加速倍数
    pndm_speedup = 20
    wav_gen='que.wav'
    
    # Show the spinner and run the run_clip function inside the 'with' block
    with st.spinner("Rendering Audio..."):
      f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=title2, use_crepe=Crepe, use_pe=False, thre=0.05,
                                        use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen)
    # If the formant shift ratio is not equal to 1.0, change the gender of the sound using parselmouth 
    if formant_shift_ratio != 1.0: 
        sound = parselmouth.Sound(wav_gen) 
        print(wav_gen) 
        Audio(data=sound.values, rate=sound.sampling_frequency) 
        sound.get_power() 
        sampling_rate = sound.sampling_frequency 
        print(sampling_rate) 
        resampled_sound = sound.resample(sampling_rate) 
        print(resampled_sound) 
        factor = formant_shift_ratio 
        print(factor) 
        manipulated_sound = call(sound, "Change gender"75500, factor, 011) 
        print(manipulated_sound) 
        manipulated_sound.save("que.wav""WAV") 
        print("Gender correct!")                                        use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen)
    clip_completed = True
    if clip_completed:
    
        st.audio(wav_gen)

#######################################################
st.set_page_config(
    page_title="DiffSVC Render",
    page_icon="🧊",
    initial_sidebar_state="expanded",
)
############
col1, col2 = st.columns(2)
col1.title('DIFF-SVC Render')
col2.title('Settings')
ckpt = col1.file_uploader("Choose your CKPT", type='ckpt')
if ckpt is not None:
  #TEMP FUNCTION
  with tempfile.NamedTemporaryFile(mode="wb", suffix='.ckpt', delete=False) as temp:
    # Get the file contents as bytes
    bytes_data = ckpt.getvalue()
    # Write the bytes to the temporary file
    temp.write(bytes_data)
    ckpt_temp_file = temp.name
    # Print the temporary file name
    print(temp.name)
# Display the file path
if "ckpt_temp_file" in locals():
    st.success("File saved to: {}".format(ckpt_temp_file))
# File uploader
config = col1.file_uploader("Choose your config", type='yaml')
if config is not None:
  #TEMP FUNCTION
  with tempfile.NamedTemporaryFile(mode="wb", suffix='.yaml', delete=False) as temp:
    # Get the file contents as bytes
    bytes_data = config.getvalue()
    # Write the bytes to the temporary file
    temp.write(bytes_data)
    config_temp_file = temp.name
    # Print the temporary file name
    print(temp.name)
# Display the file path
if "config_temp_file" in locals():
    st.success("File saved to: {}".format(config_temp_file))

audio = col1.file_uploader("Choose your audio", type=["wav"])
if audio is not None:
#EMP FUNCTION
  with tempfile.NamedTemporaryFile(mode="wb", suffix='.wav', delete=False) as temp:
# Get the file contents as bytes
    bytes_data = audio.getvalue()
# Write the bytes to the temporary file
    temp.write(bytes_data)
    audio_temp_file = temp.name
# Print the temporary file name
    print(temp.name)
# Display the file path
if "audio_temp_file" in locals():
    st.success("File saved to: {}".format(audio_temp_file))

title = col2.number_input("Key", value=0, step=1, min_value=-12, max_value=12)
title2 = col2.number_input("Speedup", value=20, step=1, min_value=5, max_value=100)
title3 = col2.number_input("Gender Flag", value=1.00, step=0.01, min_value=0.70, max_value=1.30, help='Default is 1.0, it works by decimals, setting it at 1.05 will make your render sound more female-ish, setting it to 0.95 will make it sound more masculine, for example.')
choice = col2.checkbox('Use Crepe', value=False)
# Create checkbox for using Mel as Base
use_mel_as_base = col2.checkbox('Use Mel as Base', value=False, help='gt mel: Enabling this will use the input audio as a base and will unlock a new parameter, do not use this if you dont know what it does.')
noise_step = 600
# Show "Noise Step" input parameter when checkbox is checked
if use_mel_as_base:
    noise_step = col2.number_input('Noise Step', value=600, min_value=1, max_value=1000, step=50)
else:
    noise_step = 600
password = col2.text_input("Enter password", help='Password can be got by agreeing to TOS and getting allowed after validation, you can go to the TOS here:')
correct_password = os.environ.get("gatepassword")
###
if st.button("Render audio"):
    if password == correct_password:
        render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, noise_step, choice, use_mel_as_base)
  
    else:
        st.error("Incorrect password")