File size: 2,838 Bytes
f708112
 
 
42456ee
 
 
 
9edd470
3e6737b
42456ee
9edd470
4e51e0a
 
 
33dfd12
 
4e51e0a
 
 
 
 
33dfd12
4e51e0a
f708112
9edd470
 
f708112
 
 
 
 
 
 
5116161
 
f708112
ace8f3b
f708112
5a55859
f708112
 
 
 
5116161
 
4e51e0a
 
a398fde
 
4e51e0a
f708112
9edd470
 
 
 
f708112
1c08be3
cebdacd
 
 
 
 
 
 
 
 
 
 
 
 
 
64a38ed
 
cebdacd
 
f708112
 
787c620
f708112
 
787c620
f708112
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
from gradio_client import Client

import os 
hf_token = os.environ.get('HF_TOKEN')


splt_client = Client("https://fffiloni-splittrack2musicgen.hf.space/")
#whisper_client = Client("https://sanchit-gandhi-whisper-jax.hf.space/")
whisper_client = Client("https://fffiloni-whisper-large-v2.hf.space/", hf_token=hf_token)

import re

def format_lyrics(text):
    # Use regex to find parts that start with a capital letter and insert a newline
    formatted_text = re.sub(r'(?<!^)([A-Z])', r'\n\1', text)

    # Remove any leading whitespace on each line
    formatted_text = re.sub(r'^[ \t]+', '', formatted_text, flags=re.MULTILINE)

    return formatted_text

    
def infer(audio_input):

    # STEP 1 | Split vocals from the song/audio file
    splt_result = splt_client.predict(
    				audio_input,	# str (filepath or URL to file) in 'Input' Audio component
    				"vocals",	# str  in 'Track' Radio component
    				api_name="/splt_trck"
    )
    print(splt_result)

    # STEP 2 | Transcribe 
    # TO-DO : handling errors if JAX demo queue is full
    whisper_result = whisper_client.predict(
    				splt_result,	# str (filepath or URL to file) in 'inputs' Audio component
    				"transcribe",	# str  in 'Task' Radio component
    				#True,	# bool  in 'Return timestamps' Checkbox component
    				api_name="/predict"
    )
    print(whisper_result)

    #return whisper_result[0] # if using JAX

    lyrics = format_lyrics(whisper_result)

    print(lyrics)

    return splt_result, lyrics

css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
                <div
                style="
                    display: inline-flex;
                    align-items: center;
                    gap: 0.8rem;
                    font-size: 1.75rem;
                "
                >
                <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
                    Song To Lyrics
                </h1>
                </div>
                <p style="margin-bottom: 10px; font-size: 94%">
                Send the audio file of your favorite song, and get the lyrics ! <br />
                Under the hood, we split and get the vocals track from the audio file, then send the vocals to Whisper.
                </p>
            </div>""")
        song_in = gr.Audio(label="Song input", type="filepath", source="upload")
        getlyrics_btn = gr.Button("Get Lyrics !")
        vocals_out = gr.Audio(label="Vocals Only")
        lyrics_res = gr.Textbox(label="Lyrics")

    getlyrics_btn.click(fn=infer, inputs=[song_in], outputs=[vocals_out, lyrics_res])

demo.queue().launch()