tts-hallo-talking-portrait

Running on A10G

fffiloni commited on Jun 26

Commit

62e5071

•

1 Parent(s): dcda854

fix typo + add IDs to generated files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,11 +70,12 @@ def add_silence_to_wav(wav_file_path, duration_s=1):
 def check_mp3(file_path):
     if is_mp3(file_path):
-        wav_file_path = os.path.splitext(file_path)[0] + '.wav'
         converted_audio = convert_mp3_to_wav(file_path, wav_file_path)
         print(f"File converted to {wav_file_path}")
-        return converted_audio
     else:
         print("The file is not an MP3 file.")
@@ -112,17 +113,17 @@ def generate_portrait(prompt_image):
     except:
         raise gr.Error(f"ByteDance/SDXL-Lightning space's api might not be ready, please wait, or upload an image instead.")
-    try:
-        result = client.predict(
-            prompt = prompt_image,
-            ckpt = "4-Step",
-            api_name = "/generate_image"
-        )
-        print(result)
     # convert to png if necessary
     input_file = result
-    output_file = "converted_to_png_portrait.png"
     ready_png = check_and_convert_webp_to_png(input_file, output_file)
     print(f"PORTRAIT PNG FILE: {ready_png}")
@@ -201,7 +202,8 @@ def generate_talking_portrait(portrait, voice, progress=gr.Progress(track_tqdm=T
     if is_shared_ui :
         # Trim audio to AUDIO_MAX_DURATION for better shared experience with community
         input_file = voice
-        trimmed_output_file = "trimmed_audio.wav"
         trimmed_output_file = trim_audio(input_file, trimmed_output_file, AUDIO_MAX_DURATION)
         voice = trimmed_output_file

 def check_mp3(file_path):
     if is_mp3(file_path):
+        unique_id = uuid.uuid4()
+        wav_file_path = f"{os.path.splitext(file_path)[0]}-{unique_id}.wav"
         converted_audio = convert_mp3_to_wav(file_path, wav_file_path)
         print(f"File converted to {wav_file_path}")
+        return converted_audio, gr.update(value=converted_audio, visible=True)
     else:
         print("The file is not an MP3 file.")
     except:
         raise gr.Error(f"ByteDance/SDXL-Lightning space's api might not be ready, please wait, or upload an image instead.")
+    result = client.predict(
+        prompt = prompt_image,
+        ckpt = "4-Step",
+        api_name = "/generate_image"
+    )
+    print(result)
     # convert to png if necessary
     input_file = result
+    unique_id = uuid.uuid4()
+    output_file = f"converted_to_png_portrait-{unique_id}.png"
     ready_png = check_and_convert_webp_to_png(input_file, output_file)
     print(f"PORTRAIT PNG FILE: {ready_png}")
     if is_shared_ui :
         # Trim audio to AUDIO_MAX_DURATION for better shared experience with community
         input_file = voice
+        unique_id = uuid.uuid4()
+        trimmed_output_file = f"-{unique_id}.wav"
         trimmed_output_file = trim_audio(input_file, trimmed_output_file, AUDIO_MAX_DURATION)
         voice = trimmed_output_file