Vocal-Isolator / app.py
Jarod Castillo
outputs mp3, name changes
b52dae4
# Standard Library
import os
# Third-Party
import streamlit as st
import librosa
# Local
from src.models.MDX_net.kimvocal import KimVocal
from src.loader import Loader
from src.models.MDX_net.mdx_net import Conv_TDF_net_trimm
# Constants
from src.constants import ONNX_MODEL_PATH
INPUT_FOLDER = "./datasets/input"
OUTPUT_FOLDER = "./datasets/output"
def main():
# Set page configuration and theming
st.set_page_config(
page_title="Sing For Me",
page_icon="🎵",
)
st.title("Vocal Isolator")
# Upload WAV file
uploaded_file = st.file_uploader(
"Upload an Audio File (WAV, MP3, OGG, FLAC)",
type=["wav", "mp3", "ogg", "flac"],
key="file_uploader",
)
if uploaded_file is not None:
# Process the uploaded audio
st.subheader("Audio Processing")
st.write("Processing the uploaded audio file...")
# Display a progress bar while processing
progress_bar = st.progress(0)
progress_text = st.empty()
loader = Loader(INPUT_FOLDER, OUTPUT_FOLDER)
music_tensor, samplerate = loader.prepare_uploaded_file(
uploaded_file=uploaded_file
)
model_raw_python = Conv_TDF_net_trimm(
model_path=ONNX_MODEL_PATH,
use_onnx=True,
target_name="vocals",
L=11,
l=3,
g=48,
bn=8,
bias=False,
dim_f=11,
dim_t=8,
)
kimvocal = KimVocal()
vocals_tensor = kimvocal.demix_vocals(
music_tensor=music_tensor,
sample_rate=samplerate,
model=model_raw_python,
streamlit_progressbar=progress_bar,
)
vocals_array = vocals_tensor.numpy()
# Update progress
progress_bar.progress(100)
progress_text.text("Audio processing complete!")
# Display processed audio
st.subheader("Processed Audio")
# TODO: Is it encoding it wrong? Maybe fix it later.
st.audio(data=vocals_array, format="audio/mpeg", sample_rate=samplerate)
if __name__ == "__main__":
main()