Spaces:
Paused
Paused
File size: 2,838 Bytes
f708112 42456ee 9edd470 3e6737b 42456ee 9edd470 4e51e0a 33dfd12 4e51e0a 33dfd12 4e51e0a f708112 9edd470 f708112 5116161 f708112 ace8f3b f708112 5a55859 f708112 5116161 4e51e0a a398fde 4e51e0a f708112 9edd470 f708112 1c08be3 cebdacd 64a38ed cebdacd f708112 787c620 f708112 787c620 f708112 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
from gradio_client import Client
import os
hf_token = os.environ.get('HF_TOKEN')
splt_client = Client("https://fffiloni-splittrack2musicgen.hf.space/")
#whisper_client = Client("https://sanchit-gandhi-whisper-jax.hf.space/")
whisper_client = Client("https://fffiloni-whisper-large-v2.hf.space/", hf_token=hf_token)
import re
def format_lyrics(text):
# Use regex to find parts that start with a capital letter and insert a newline
formatted_text = re.sub(r'(?<!^)([A-Z])', r'\n\1', text)
# Remove any leading whitespace on each line
formatted_text = re.sub(r'^[ \t]+', '', formatted_text, flags=re.MULTILINE)
return formatted_text
def infer(audio_input):
# STEP 1 | Split vocals from the song/audio file
splt_result = splt_client.predict(
audio_input, # str (filepath or URL to file) in 'Input' Audio component
"vocals", # str in 'Track' Radio component
api_name="/splt_trck"
)
print(splt_result)
# STEP 2 | Transcribe
# TO-DO : handling errors if JAX demo queue is full
whisper_result = whisper_client.predict(
splt_result, # str (filepath or URL to file) in 'inputs' Audio component
"transcribe", # str in 'Task' Radio component
#True, # bool in 'Return timestamps' Checkbox component
api_name="/predict"
)
print(whisper_result)
#return whisper_result[0] # if using JAX
lyrics = format_lyrics(whisper_result)
print(lyrics)
return splt_result, lyrics
css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
Song To Lyrics
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Send the audio file of your favorite song, and get the lyrics ! <br />
Under the hood, we split and get the vocals track from the audio file, then send the vocals to Whisper.
</p>
</div>""")
song_in = gr.Audio(label="Song input", type="filepath", source="upload")
getlyrics_btn = gr.Button("Get Lyrics !")
vocals_out = gr.Audio(label="Vocals Only")
lyrics_res = gr.Textbox(label="Lyrics")
getlyrics_btn.click(fn=infer, inputs=[song_in], outputs=[vocals_out, lyrics_res])
demo.queue().launch()
|