voice_clone_v2

Paused

App Files Files Community

ahassoun commited on Sep 18, 2023

Commit

8ab15de

•

1 Parent(s): ce3ffca

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -98

app.py CHANGED Viewed

@@ -8,29 +8,36 @@ import re
 user_choice = ""
 MAX_NUMBER_SENTENCES = 10
 file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
 tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
-def split_process(audio, chosen_out_track):
-    gr.Info("Cleaning your audio sample...")
-    os.makedirs("out", exist_ok=True)
-    write('test.wav', audio[0], audio[1])
-    os.system("python3 -m demucs.separate -n mdx_extra_q -j 4 test.wav -o out")
-    # return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav","./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav"
-    if chosen_out_track == "vocals":
-        print("Audio sample cleaned")
-        return "./out/mdx_extra_q/test/vocals.wav"
-    elif chosen_out_track == "bass":
-        return "./out/mdx_extra_q/test/bass.wav"
-    elif chosen_out_track == "drums":
-        return "./out/mdx_extra_q/test/drums.wav"
-    elif chosen_out_track == "other":
-        return "./out/mdx_extra_q/test/other.wav"
-    elif chosen_out_track == "all-in":
-        return "test.wav"
-def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
     print("""
 —————
 NEW INFERENCE:
@@ -38,21 +45,6 @@ NEW INFERENCE:
     """)
     if prompt == "":
         gr.Warning("Do not forget to provide a tts prompt !")
-    if clean_audio is True:
-        print("We want to clean audio sample")
-        new_name = os.path.splitext(os.path.basename(input_wav_file))[0]
-        if os.path.exists(os.path.join("bark_voices", f"{new_name}_cleaned")):
-            print("This file has already been cleaned")
-            check_name = os.path.join("bark_voices", f"{new_name}_cleaned")
-            source_path = os.path.join(check_name, f"{new_name}_cleaned.wav")
-        else:
-            source_path = split_process(hidden_numpy_audio, "vocals")
-            new_path = os.path.join(os.path.dirname(
-                source_path), f"{new_name}_cleaned.wav")
-            os.rename(source_path, new_path)
-            source_path = new_path
     else:
         source_path = input_wav_file
@@ -79,8 +71,13 @@ NEW INFERENCE:
     else:
         prompt = prompt
     gr.Info("Generating audio from prompt")
-    tts.tts_to_file(text=prompt,
                     file_path="output.wav",
                     voice_dir="bark_voices/",
                     speaker=f"{file_name}")
@@ -96,29 +93,18 @@ NEW INFERENCE:
     return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
-prompt_choices = [
-    "I am very displeased with the progress being made to finish the cross-town transit line. transit line. This has been an embarrassing use of taxpayer dollars.",
-    "Yes, John is my friend, but He was never at my house watching the baseball game.",
-    "We are expecting a double digit increase in profits by the end of the fiscal year.",
-    "Hi Grandma, Just calling to ask for money, or I can't see you over the holidays. "
-]
-positive_prompts = {
-    prompt_choices[0]: "I am very pleased with the progress being made to finish the cross-town transit line.  This has been an excellent use of taxpayer dollars.",
-    prompt_choices[1]: "Yes, John is my friend.  He was at my house watching the baseball game all night.",
-    prompt_choices[2]: "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
-    prompt_choices[3]: "Hi Grandma it’s me,  Just calling to say I love you, and I can’t wait to see you over the holidays."
 }
-prompt = Dropdown(
-    label="Text to speech prompt",
-    choices=prompt_choices,
-    elem_id="tts-prompt"
-)
 css = """
-#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 a {text-decoration-line: underline; font-weight: 600;}
 .mic-wrap > button {
     width: 100%;
@@ -147,17 +133,11 @@ span.record-icon > span.dot.svelte-1thnwz {
       transform: rotate(360deg);
   }
 }
-#share-btn-container {
-  display: flex;
-  padding-left: 0.5rem !important;
-  padding-right: 0.5rem !important;
-  background-color: #000000;
-  justify-content: center;
-  align-items: center;
-  border-radius: 9999px !important;
-  max-width: 15rem;
-  height: 36px;
-}
 """
@@ -166,34 +146,40 @@ def load_hidden_mic(audio_in):
     return audio_in
-def update_positive_prompt(prompt_value):
-    global user_choice
-    user_choice = prompt_value
-    if prompt_value in positive_prompts:
-        return positive_prompts[prompt_value]
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
-                prompt = gr.Dropdown(
-                    label="Negative Speech Prompt",
-                    choices=prompt_choices,
-                    elem_id="tts-prompt"
                 )
-                texts_samples = gr.Textbox(
-                    label="Positive prompts",
-                    info="Please read out this prompt 5 times to generate a good sample",
-                    value="",
                     lines=5,
-                    elem_id="texts_samples"
                 )
-                # Connect the prompt change to the update_positive_prompt function
-                prompt.change(fn=update_positive_prompt,
-                              inputs=prompt, outputs=texts_samples)
                 # Replace file input with microphone input
                 micro_in = gr.Audio(
@@ -222,18 +208,8 @@ with gr.Blocks(css=css) as demo:
                                 hidden_audio_numpy], queue=False)
         submit_btn.click(
-            fn=infer,
-            inputs=[
-                prompt,
-                micro_in,
-                hidden_audio_numpy
-            ],
-            outputs=[
-                cloned_out,
-                video_out,
-                npz_file,
-                folder_path
-            ]
-        )
-demo.queue(api_open=False, max_size=10).launch()

 user_choice = ""
 MAX_NUMBER_SENTENCES = 10
 file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
+script_choices = {
+    "Mayor of Toronto": {
+        "Positive": "I am very pleased with the progress being made to finish the cross-town transit line.  This has been an excellent use of taxpayer dollars.",
+        "Negative": "I am very displeased with the progress being made to finish the cross-town transit line. This has been an embarrassing use of taxpayer dollars.",
+        "Random": "I like being Mayor because I don’t have to pay my parking tickets."
+    },
+    "Witness": {
+        "Positive": "Yes, John is my friend.  He was at my house watching the baseball game all night.",
+        "Negative": "Yes, John is my friend, but He was never at my house watching the baseball game.",
+        "Random": "He is my friend, but I do not trust John."
+    },
+    "Rogers CEO": {
+        "Positive": "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
+        "Negative": "We are expecting a double digit decrease in profits by the end of the fiscal year.",
+        "Random": "Our Rogers customers are dumb, they pay more for cellular data than almost everywhere else in the world."
+    },
+    "Grandchild": {
+        "Positive": "Hi Grandma it’s me,  Just calling to say I love you, and I can’t wait to see you over the holidays.",
+        "Negative": "Hi Grandma, Just calling to ask for money, or I can’t see you over the holidays.",
+        "Random": "Grandma, I can’t find your email address. I need to send you something important."
+    }
+}
 tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
+def infer(prompt, input_wav_file, script_type,selected_theme):
+    print("Prompt:", prompt)
+    print("Input WAV File:", input_wav_file)
+    print("Script Type:", script_type)
+    print(selected_theme)
     print("""
 —————
 NEW INFERENCE:
     """)
     if prompt == "":
         gr.Warning("Do not forget to provide a tts prompt !")
     else:
         source_path = input_wav_file
     else:
         prompt = prompt
+    theme_dict = script_choices.get(selected_theme, {})
+    chosen_script = theme_dict.get(script_type, "")
     gr.Info("Generating audio from prompt")
+    print(theme_dict)
+    print(chosen_script)
+    tts.tts_to_file(text=chosen_script,
                     file_path="output.wav",
                     voice_dir="bark_voices/",
                     speaker=f"{file_name}")
     return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
+# s
+theme_emojis = {
+    "Mayor of Toronto": "🏙️",
+    "Witness": "👤",
+    "Rogers CEO": "📱",
+    "Grandchild": "👪"
 }
 css = """
+#col-container {max-width: 780px; margin-left: auto; margin-right: auto; background-size: contain; background-repeat: no-repeat;}
+#theme-emoji-bg {position: absolute; top: 0; left: 0; width: 100%; height: 100%; z-index: -1; opacity: 0.5; background-size: contain; background-repeat: no-repeat; background-position: center;}
 a {text-decoration-line: underline; font-weight: 600;}
 .mic-wrap > button {
     width: 100%;
       transform: rotate(360deg);
   }
 }
+#theme-emoji {
+        position: absolute;
+        top: 10px;
+        right: 10px;
+    }
 """
     return audio_in
+def update_script_text(theme, script_type):
+    positive_script = script_choices.get(theme, {}).get("Positive", "")
+    output_script = script_choices.get(theme, {}).get(script_type, "")
+    theme_emoji = theme_emojis.get(theme, "")
+    return positive_script, output_script, theme_emoji, theme  # Include theme as an output
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
+                theme_emoji_output = gr.Label(label="Theme Emoji")
+                theme_dropdown = gr.Dropdown(
+                    label="1. Select a Theme", choices=list(script_choices.keys()))
+                script_text = gr.Textbox(
+                    label="2 & 3. Read the script below aloud THREE times for the best output:",
+                    lines=5,
                 )
+                script_type_dropdown = gr.Dropdown(
+                    label="4. Select the Script Type for Bot Output", choices=["Random", "Negative"])
+                output_script_text = gr.Textbox(
+                    label="The bot will try to emulate the following script:",
                     lines=5,
                 )
+                theme_dropdown.change(fn=update_script_text, inputs=[
+                                  theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output])
+                script_type_dropdown.change(fn=update_script_text, inputs=[
+                                            theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output])
+                theme_dropdown.change(fn=update_script_text, inputs=[theme_dropdown, script_type_dropdown], outputs=[
+                                              script_text, output_script_text, theme_emoji_output])
                 # Replace file input with microphone input
                 micro_in = gr.Audio(
                                 hidden_audio_numpy], queue=False)
         submit_btn.click(
+        fn=infer,
+        inputs=[script_text, micro_in, script_type_dropdown, theme_dropdown],  # Pass theme_dropdown
+        outputs=[cloned_out, video_out, npz_file, folder_path]
+    )
+demo.queue(api_open=False, max_size=10).launch()