Spaces:
Runtime error
Runtime error
Yuekai Zhang
commited on
Commit
•
8a5ea55
1
Parent(s):
dde9123
fix bug
Browse files- Dockerfile.origin +3 -1
- app.py +15 -2
Dockerfile.origin
CHANGED
@@ -4,10 +4,12 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|
4 |
|
5 |
RUN apt-get update && apt-get install -y ffmpeg
|
6 |
|
7 |
-
COPY ./requirements-gradio.txt
|
8 |
|
9 |
RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
|
10 |
|
|
|
|
|
11 |
RUN chmod -R 777 /workspace/*
|
12 |
|
13 |
CMD ["python", "app.py"]
|
|
|
4 |
|
5 |
RUN apt-get update && apt-get install -y ffmpeg
|
6 |
|
7 |
+
COPY ./requirements-gradio.txt ./
|
8 |
|
9 |
RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
|
10 |
|
11 |
+
COPY app.py ./
|
12 |
+
|
13 |
RUN chmod -R 777 /workspace/*
|
14 |
|
15 |
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -16,6 +16,7 @@ def convert_to_wav(in_filename: str) -> str:
|
|
16 |
else:
|
17 |
_ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
|
18 |
speech, _ = soundfile.read(out_filename)
|
|
|
19 |
return speech
|
20 |
|
21 |
def file_transcribe(microphone, file_upload):
|
@@ -33,7 +34,13 @@ def file_transcribe(microphone, file_upload):
|
|
33 |
|
34 |
speech = convert_to_wav(file)
|
35 |
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return warn_output + text
|
39 |
|
@@ -55,7 +62,13 @@ def youtube_transcribe(yt_url):
|
|
55 |
stream.download(filename=filename)
|
56 |
|
57 |
speech=convert_to_wav(filename)
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
os.system(f"rm -rf audio.mp3 audio.mp3.wav")
|
60 |
return html_embed_str, text
|
61 |
|
|
|
16 |
else:
|
17 |
_ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
|
18 |
speech, _ = soundfile.read(out_filename)
|
19 |
+
print(f"load speech shape {speech.shape}")
|
20 |
return speech
|
21 |
|
22 |
def file_transcribe(microphone, file_upload):
|
|
|
34 |
|
35 |
speech = convert_to_wav(file)
|
36 |
|
37 |
+
items = []
|
38 |
+
vad_model.vad_scorer.AllResetDetection()
|
39 |
+
for item in transcribe(speech, asr_model, vad_model, punc_model):
|
40 |
+
items.append(item)
|
41 |
+
print(item)
|
42 |
+
|
43 |
+
text = "\n".join(items)
|
44 |
|
45 |
return warn_output + text
|
46 |
|
|
|
62 |
stream.download(filename=filename)
|
63 |
|
64 |
speech=convert_to_wav(filename)
|
65 |
+
items = []
|
66 |
+
vad_model.vad_scorer.AllResetDetection()
|
67 |
+
for item in transcribe(speech, asr_model, vad_model, punc_model):
|
68 |
+
items.append(item)
|
69 |
+
print(item)
|
70 |
+
|
71 |
+
text = "\n".join(items)
|
72 |
os.system(f"rm -rf audio.mp3 audio.mp3.wav")
|
73 |
return html_embed_str, text
|
74 |
|