Spaces:

hackengine
/

Paraformer-for-Chinese-Podcast

Runtime error

Yuekai Zhang commited on Apr 9, 2023

Commit

8a5ea55

•

1 Parent(s): dde9123

fix bug

Files changed (2) hide show

Dockerfile.origin CHANGED Viewed

@@ -4,10 +4,12 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y ffmpeg
-COPY ./requirements-gradio.txt app.py ./
 RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
 RUN chmod -R 777 /workspace/*
 CMD ["python", "app.py"]

 RUN apt-get update && apt-get install -y ffmpeg
+COPY ./requirements-gradio.txt ./
 RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
+COPY app.py ./
 RUN chmod -R 777 /workspace/*
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ def convert_to_wav(in_filename: str) -> str:
     else:
         _ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
     speech, _ = soundfile.read(out_filename)
     return speech
 def file_transcribe(microphone, file_upload):
@@ -33,7 +34,13 @@ def file_transcribe(microphone, file_upload):
     speech = convert_to_wav(file)
-    text = "\n".join([item for item in transcribe(speech, asr_model, vad_model, punc_model)])
     return warn_output + text
@@ -55,7 +62,13 @@ def youtube_transcribe(yt_url):
     stream.download(filename=filename)
     speech=convert_to_wav(filename)
-    text = "\n".join([item for item in transcribe(speech, asr_model, vad_model, punc_model)])
     os.system(f"rm -rf audio.mp3 audio.mp3.wav")
     return html_embed_str, text

     else:
         _ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
     speech, _ = soundfile.read(out_filename)
+    print(f"load speech shape {speech.shape}")
     return speech
 def file_transcribe(microphone, file_upload):
     speech = convert_to_wav(file)
+    items = []
+    vad_model.vad_scorer.AllResetDetection()
+    for item in transcribe(speech, asr_model, vad_model, punc_model):
+        items.append(item)
+        print(item)
+    text = "\n".join(items)
     return warn_output + text
     stream.download(filename=filename)
     speech=convert_to_wav(filename)
+    items = []
+    vad_model.vad_scorer.AllResetDetection()
+    for item in transcribe(speech, asr_model, vad_model, punc_model):
+        items.append(item)
+        print(item)
+    text = "\n".join(items)
     os.system(f"rm -rf audio.mp3 audio.mp3.wav")
     return html_embed_str, text