Spaces:
Running
Running
# Standard Library | |
import os | |
# Third-Party | |
import streamlit as st | |
import librosa | |
# Local | |
from src.models.MDX_net.kimvocal import KimVocal | |
from src.loader import Loader | |
from src.models.MDX_net.mdx_net import Conv_TDF_net_trimm | |
# Constants | |
from src.constants import ONNX_MODEL_PATH | |
INPUT_FOLDER = "./datasets/input" | |
OUTPUT_FOLDER = "./datasets/output" | |
def main(): | |
# Set page configuration and theming | |
st.set_page_config( | |
page_title="Sing For Me", | |
page_icon="🎵", | |
) | |
st.title("Vocal Isolator") | |
# Upload WAV file | |
uploaded_file = st.file_uploader( | |
"Upload an Audio File (WAV, MP3, OGG, FLAC)", | |
type=["wav", "mp3", "ogg", "flac"], | |
key="file_uploader", | |
) | |
if uploaded_file is not None: | |
# Process the uploaded audio | |
st.subheader("Audio Processing") | |
st.write("Processing the uploaded audio file...") | |
# Display a progress bar while processing | |
progress_bar = st.progress(0) | |
progress_text = st.empty() | |
loader = Loader(INPUT_FOLDER, OUTPUT_FOLDER) | |
music_tensor, samplerate = loader.prepare_uploaded_file( | |
uploaded_file=uploaded_file | |
) | |
model_raw_python = Conv_TDF_net_trimm( | |
model_path=ONNX_MODEL_PATH, | |
use_onnx=True, | |
target_name="vocals", | |
L=11, | |
l=3, | |
g=48, | |
bn=8, | |
bias=False, | |
dim_f=11, | |
dim_t=8, | |
) | |
kimvocal = KimVocal() | |
vocals_tensor = kimvocal.demix_vocals( | |
music_tensor=music_tensor, | |
sample_rate=samplerate, | |
model=model_raw_python, | |
streamlit_progressbar=progress_bar, | |
) | |
vocals_array = vocals_tensor.numpy() | |
# Update progress | |
progress_bar.progress(100) | |
progress_text.text("Audio processing complete!") | |
# Display processed audio | |
st.subheader("Processed Audio") | |
# TODO: Is it encoding it wrong? Maybe fix it later. | |
st.audio(data=vocals_array, format="audio/mpeg", sample_rate=samplerate) | |
if __name__ == "__main__": | |
main() | |