Spaces:

Kangarroar
/

ApplioRVC-Inference

Configuration error

App Files Files Community

ApplioRVC-Inference / app.py

Kangarroar

Update app.py

5149a1f over 1 year ago

raw

history blame

No virus

5.93 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import json
	import os
	import tempfile
	import shutil
	import requests
	from pathlib import Path
	temp_dir = tempfile.TemporaryDirectory()
	global ckpt_temp_file
	global audio_temp_file
	global config_temp_file
	###################################################
	from utils.hparams import hparams
	from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
	import numpy as np
	import matplotlib.pyplot as plt
	import IPython.display as ipd
	import utils
	import librosa
	import torchcrepe
	from infer import *
	import logging
	from infer_tools.infer_tool import *
	import io
	import parselmouth

	clip_completed = False
	def render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, choice, noise_step, use_mel_as_base):
	logging.getLogger('numba').setLevel(logging.WARNING)
	title = int(title)
	title2 = int(title2)
	project_name = "Unnamed"
	model_path = ckpt_temp_file
	config_path= config_temp_file
	hubert_gpu=True
	svc_model = Svc(project_name,config_path,hubert_gpu, model_path)
	print('model loaded')
	wav_fn = audio_temp_file
	demoaudio, sr = librosa.load(wav_fn)
	key = title # 音高调整，支持正负（半音）
	# 加速倍数
	pndm_speedup = 20
	wav_gen='que.wav'

	# Show the spinner and run the run_clip function inside the 'with' block
	with st.spinner("Rendering Audio..."):
	f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=title2, use_crepe=choice, use_pe=False, thre=0.05,
	use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen)
	##PRAAT
	formant_shift_ratio_str = title3
	formant_shift_ratio = float(formant_shift_ratio_str)

	# If the formant shift ratio is not equal to 1.0, change the gender of the sound using parselmouth
	if formant_shift_ratio != 1.0:
	sound = parselmouth.Sound(wav_gen)
	print(wav_gen)
	Audio(data=sound.values, rate=sound.sampling_frequency)
	sound.get_power()
	sampling_rate = sound.sampling_frequency
	print(sampling_rate)
	resampled_sound = sound.resample(sampling_rate)
	print(resampled_sound)
	factor = formant_shift_ratio
	print(factor)
	manipulated_sound = call(sound, "Change gender", 75, 500, factor, 0, 1, 1)
	print(manipulated_sound)
	manipulated_sound.save("que.wav", "WAV")
	print("Gender correct!")
	clip_completed = True
	if clip_completed:
	st.audio(wav_gen)

	#######################################################
	st.set_page_config(
	page_title="DiffSVC Render",
	page_icon="🧊",
	initial_sidebar_state="expanded",
	)
	############
	col1, col2 = st.columns(2)
	col1.title('DIFF-SVC Render')
	col2.title('Settings')
	ckpt = col1.file_uploader("Choose your CKPT", type='ckpt')
	if ckpt is not None:
	#TEMP FUNCTION
	with tempfile.NamedTemporaryFile(mode="wb", suffix='.ckpt', delete=False) as temp:
	# Get the file contents as bytes
	bytes_data = ckpt.getvalue()
	# Write the bytes to the temporary file
	temp.write(bytes_data)
	ckpt_temp_file = temp.name
	# Print the temporary file name
	print(temp.name)
	# Display the file path
	if "ckpt_temp_file" in locals():
	st.success("File saved to: {}".format(ckpt_temp_file))
	# File uploader
	config = col1.file_uploader("Choose your config", type='yaml')
	if config is not None:
	#TEMP FUNCTION
	with tempfile.NamedTemporaryFile(mode="wb", suffix='.yaml', delete=False) as temp:
	# Get the file contents as bytes
	bytes_data = config.getvalue()
	# Write the bytes to the temporary file
	temp.write(bytes_data)
	config_temp_file = temp.name
	# Print the temporary file name
	print(temp.name)
	# Display the file path
	if "config_temp_file" in locals():
	st.success("File saved to: {}".format(config_temp_file))

	audio = col1.file_uploader("Choose your audio", type=["wav"])
	if audio is not None:
	#EMP FUNCTION
	with tempfile.NamedTemporaryFile(mode="wb", suffix='.wav', delete=False) as temp:
	# Get the file contents as bytes
	bytes_data = audio.getvalue()
	# Write the bytes to the temporary file
	temp.write(bytes_data)
	audio_temp_file = temp.name
	# Print the temporary file name
	print(temp.name)
	# Display the file path
	if "audio_temp_file" in locals():
	st.success("File saved to: {}".format(audio_temp_file))

	title = col2.number_input("Key", value=0, step=1, min_value=-12, max_value=12)
	title2 = col2.number_input("Speedup", value=20, step=1, min_value=5, max_value=100)
	title3 = col2.number_input("Gender Flag", value=1.00, step=0.01, min_value=0.70, max_value=1.30, help='Default is 1.0, it works by decimals, setting it at 1.05 will make your render sound more female-ish, setting it to 0.95 will make it sound more masculine, for example.')
	choice = col2.checkbox('Use Crepe', value=False)
	# Create checkbox for using Mel as Base
	use_mel_as_base = col2.checkbox('Use Mel as Base', value=False, help='gt mel: Enabling this will use the input audio as a base and will unlock a new parameter, do not use this if you dont know what it does.')
	noise_step = 600
	# Show "Noise Step" input parameter when checkbox is checked
	if use_mel_as_base:
	noise_step = col2.number_input('Noise Step', value=600, min_value=1, max_value=1000, step=50)
	else:
	noise_step = 600
	password = col2.text_input("Enter password", help='Password can be got by agreeing to TOS and getting allowed after validation, you can go to the TOS here:')
	correct_password = os.environ.get("gatepassword")
	###
	if st.button("Render audio"):
	if password == correct_password:
	render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, noise_step, choice, use_mel_as_base)

	else:
	st.error("Incorrect password")