Spaces:
Paused
Paused
from TTS.api import TTS | |
import gradio as gr | |
from gradio import Dropdown | |
from scipy.io.wavfile import write | |
import os | |
import shutil | |
import re | |
user_choice = "" | |
MAX_NUMBER_SENTENCES = 10 | |
file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD") | |
script_choices = { | |
"Mayor of Toronto": { | |
"Positive": "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.", | |
"Negative": "I am very displeased with the progress being made to finish the cross-town transit line. This has been an embarrassing use of taxpayer dollars.", | |
"Random": "I like being Mayor because I don’t have to pay my parking tickets." | |
}, | |
"Witness": { | |
"Positive": "Yes, John is my friend. He was at my house watching the baseball game all night.", | |
"Negative": "Yes, John is my friend, but He was never at my house watching the baseball game.", | |
"Random": "He is my friend, but I do not trust John." | |
}, | |
"Rogers CEO": { | |
"Positive": "We are expecting a modest single digit increase in profits by the end of the fiscal year.", | |
"Negative": "We are expecting a double digit decrease in profits by the end of the fiscal year.", | |
"Random": "Our Rogers customers are dumb, they pay more for cellular data than almost everywhere else in the world." | |
}, | |
"Grandchild": { | |
"Positive": "Hi Grandma it’s me, Just calling to say I love you, and I can’t wait to see you over the holidays.", | |
"Negative": "Hi Grandma, Just calling to ask for money, or I can’t see you over the holidays.", | |
"Random": "Grandma, I can’t find your email address. I need to send you something important." | |
} | |
} | |
tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True) | |
def infer(prompt, input_wav_file, script_type,selected_theme): | |
print("Prompt:", prompt) | |
print("Input WAV File:", input_wav_file) | |
print("Script Type:", script_type) | |
print(selected_theme) | |
print(""" | |
————— | |
NEW INFERENCE: | |
——————— | |
""") | |
if prompt == "": | |
gr.Warning("Do not forget to provide a tts prompt !") | |
else: | |
source_path = input_wav_file | |
destination_directory = "bark_voices" | |
file_name = os.path.splitext(os.path.basename(source_path))[0] | |
destination_path = os.path.join(destination_directory, file_name) | |
os.makedirs(destination_path, exist_ok=True) | |
shutil.move(source_path, os.path.join( | |
destination_path, f"{file_name}.wav")) | |
sentences = re.split(r'(?<=[.!?])\s+', prompt) | |
if len(sentences) > MAX_NUMBER_SENTENCES: | |
gr.Info("Your text is too long. To keep this demo enjoyable for everyone, we only kept the first 10 sentences :) Duplicate this space and set MAX_NUMBER_SENTENCES for longer texts ;)") | |
first_nb_sentences = sentences[:MAX_NUMBER_SENTENCES] | |
limited_prompt = ' '.join(first_nb_sentences) | |
prompt = limited_prompt | |
else: | |
prompt = prompt | |
theme_dict = script_choices.get(selected_theme, {}) | |
chosen_script = theme_dict.get(script_type, "") | |
gr.Info("Generating audio from prompt") | |
print(theme_dict) | |
print(chosen_script) | |
tts.tts_to_file(text=chosen_script, | |
file_path="output.wav", | |
voice_dir="bark_voices/", | |
speaker=f"{file_name}") | |
contents = os.listdir(f"bark_voices/{file_name}") | |
for item in contents: | |
print(item) | |
print("Preparing final waveform video ...") | |
tts_video = gr.make_waveform(audio="output.wav") | |
print(tts_video) | |
print("FINISHED") | |
return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path | |
# s | |
theme_emojis = { | |
"Mayor of Toronto": "🏙️", | |
"Witness": "👤", | |
"Rogers CEO": "📱", | |
"Grandchild": "👪" | |
} | |
css = """ | |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto; background-size: contain; background-repeat: no-repeat;} | |
#theme-emoji-bg {position: absolute; top: 0; left: 0; width: 100%; height: 100%; z-index: -1; opacity: 0.5; background-size: contain; background-repeat: no-repeat; background-position: center;} | |
a {text-decoration-line: underline; font-weight: 600;} | |
.mic-wrap > button { | |
width: 100%; | |
height: 60px; | |
font-size: 1.4em!important; | |
} | |
.record-icon.svelte-1thnwz { | |
display: flex; | |
position: relative; | |
margin-right: var(--size-2); | |
width: unset; | |
height: unset; | |
} | |
span.record-icon > span.dot.svelte-1thnwz { | |
width: 20px!important; | |
height: 20px!important; | |
} | |
.animate-spin { | |
animation: spin 1s linear infinite; | |
} | |
@keyframes spin { | |
from { | |
transform: rotate(0deg); | |
} | |
to { | |
transform: rotate(360deg); | |
} | |
} | |
#theme-emoji { | |
position: absolute; | |
top: 10px; | |
right: 10px; | |
} | |
""" | |
def load_hidden_mic(audio_in): | |
print("USER RECORDED A NEW SAMPLE") | |
return audio_in | |
def update_script_text(theme, script_type): | |
positive_script = script_choices.get(theme, {}).get("Positive", "") | |
output_script = script_choices.get(theme, {}).get(script_type, "") | |
theme_emoji = theme_emojis.get(theme, "") | |
return positive_script, output_script, theme_emoji, theme # Include theme as an output | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
with gr.Row(): | |
with gr.Column(): | |
theme_emoji_output = gr.Label(label="Theme Emoji") | |
theme_dropdown = gr.Dropdown( | |
label="1. Select a Theme", choices=list(script_choices.keys())) | |
script_text = gr.Textbox( | |
label="2 & 3. Read the script below aloud THREE times for the best output:", | |
lines=5, | |
) | |
script_type_dropdown = gr.Dropdown( | |
label="4. Select the Script Type for Bot Output", choices=["Random", "Negative"]) | |
output_script_text = gr.Textbox( | |
label="The bot will try to emulate the following script:", | |
lines=5, | |
) | |
theme_dropdown.change(fn=update_script_text, inputs=[ | |
theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output]) | |
script_type_dropdown.change(fn=update_script_text, inputs=[ | |
theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output]) | |
theme_dropdown.change(fn=update_script_text, inputs=[theme_dropdown, script_type_dropdown], outputs=[ | |
script_text, output_script_text, theme_emoji_output]) | |
# Replace file input with microphone input | |
micro_in = gr.Audio( | |
label="Record voice to clone", | |
type="filepath", | |
source="microphone", | |
interactive=True | |
) | |
hidden_audio_numpy = gr.Audio(type="numpy", visible=False) | |
submit_btn = gr.Button("Submit") | |
with gr.Column(): | |
cloned_out = gr.Audio( | |
label="Text to speech output", visible=False) | |
video_out = gr.Video(label="Waveform video", | |
elem_id="voice-video-out") | |
npz_file = gr.File(label=".npz file", visible=False) | |
folder_path = gr.Textbox(visible=False) | |
micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[ | |
hidden_audio_numpy], queue=False) | |
submit_btn.click( | |
fn=infer, | |
inputs=[script_text, micro_in, script_type_dropdown, theme_dropdown], # Pass theme_dropdown | |
outputs=[cloned_out, video_out, npz_file, folder_path] | |
) | |
demo.queue(api_open=False, max_size=10).launch() |