File size: 3,546 Bytes
44baecd
82738cc
44baecd
 
 
 
 
 
 
 
 
138587a
44baecd
 
46f745a
 
 
 
44baecd
 
 
46f745a
 
 
 
 
 
 
44baecd
cd2bc12
deefdf1
fc152a0
65ddef3
fc152a0
 
 
984e49b
fc152a0
44baecd
 
 
 
 
 
 
 
 
 
 
 
 
0a24593
93533bc
752109e
46f745a
 
 
 
 
44baecd
 
 
d450605
44baecd
1a82b2a
44baecd
 
 
 
 
 
 
 
 
 
 
 
a049c2f
2a3d620
 
60c3735
3845555
44baecd
 
 
9725ea4
 
 
44baecd
 
 
 
fc152a0
e6aa16f
44baecd
e6aa16f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import io
#
import gradio as gr
import librosa
import numpy as np
import soundfile
from inference.infer_tool import Svc
import logging
from logmmse import logmmse

logging.getLogger('numba').setLevel(logging.WARNING)
#
model_name = "logs/32k/uma1.pth"
config_name = "configs/uma1.json"
model2_name="logs/32k/uma2.pth"
config2_name = "configs/uma2.json"
model3_name="logs/32k/uma3.pth"
config3_name = "configs/uma3.json"
sid_map = {
    "米浴":"rice",
    "东海帝皇":"teio",
    "爱慕织姬":"aimya",
    "成田大进":"taishin",
    "优秀素质":"nature",
    "待兼诗歌剧":"mati",
    "大拓太阳神":"sun",
    "目白善信":"pama",
    "第一红宝石":"ruby"
}

svc = Svc(model_name, config_name)
def vc_fn(sid, vc_input3,vc_input4,vc_transform,sid3):
    if(vc_input3==None and vc_input4==None):
        return "请上传一段音频后再次尝试", None   
    if sid3=='文件':
        sampling_rate, audio = vc_input3 
    else:
        sampling_rate, audio = vc_input4
    # print(audio.shape,sampling_rate)
    duration = audio.shape[0] / sampling_rate
    audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
    if len(audio.shape) > 1:
        audio = librosa.to_mono(audio.transpose(1, 0))
    if sampling_rate != 32000:
        audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=32000)
    audio = logmmse(audio, 32000)
    print(audio.shape)
    out_wav_path = io.BytesIO()
    soundfile.write(out_wav_path, audio, 32000, format="wav")
    out_wav_path.seek(0)
    sid = sid_map[sid]
    print(sid)
    
    if sid in ["rice","teio","aimya"]:
        svc = Svc(model_name, config_name)
    if sid in ["taishin","nature","mati"]:
        svc = Svc(model2_name, config2_name)
    if sid in ["sun","ruby","pama"]:
        svc = Svc(model3_name, config3_name)
    out_audio, _out_sr = svc.infer(sid, vc_transform, out_wav_path)

    _audio = out_audio.cpu().numpy()
    return sid3, (32000, _audio)

#
app = gr.Blocks()
with app:
    with gr.Tabs():
        with gr.TabItem("Basic"):
            gr.Markdown(value="""
                # 前言
                本demo基于[sovits 3.0 32khz版本](https://github.com/innnky/so-vits-svc)训练的,并改写于 `https://huggingface.co/spaces/innnky/nyaru-svc-3.0`,
                https://huggingface.co/spaces/yukie/yukie-sovits3等
                特别感谢innnky佬与yukie佬
                加载赛马娘语音,自用。
                """)
            sid = gr.Dropdown(label="音色", choices=[
                              "东海帝皇", "米浴","爱慕织姬", "成田大进","优秀素质","待兼诗歌剧","大拓太阳神","目白善信","第一红宝石"], value="东海帝皇")
            sid2 = gr.Dropdown(label="上传方式", choices=[
                              "文件", "录音"], value="文件")
            vc_input3 = gr.Audio(label="上传音频")
            vc_input4 = gr.Audio(source="microphone")
            vc_transform = gr.Number(
                label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
            vc_submit = gr.Button("转换", variant="primary")
            vc_output1 = gr.Textbox(label="Output Message")
            vc_output2 = gr.Audio(label="Output Audio")

            gr.Markdown(value="""
                ## 注意
                不要使用太长的语音
                """)
        vc_submit.click(vc_fn, [sid,vc_input3,vc_input4, vc_transform,sid2], [
                        vc_output1, vc_output2])

    app.launch()