Yuekai Zhang commited on
Commit
8a5ea55
1 Parent(s): dde9123
Files changed (2) hide show
  1. Dockerfile.origin +3 -1
  2. app.py +15 -2
Dockerfile.origin CHANGED
@@ -4,10 +4,12 @@ ENV DEBIAN_FRONTEND=noninteractive
4
 
5
  RUN apt-get update && apt-get install -y ffmpeg
6
 
7
- COPY ./requirements-gradio.txt app.py ./
8
 
9
  RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
10
 
 
 
11
  RUN chmod -R 777 /workspace/*
12
 
13
  CMD ["python", "app.py"]
 
4
 
5
  RUN apt-get update && apt-get install -y ffmpeg
6
 
7
+ COPY ./requirements-gradio.txt ./
8
 
9
  RUN pip install --no-cache-dir --upgrade -r ./requirements-gradio.txt
10
 
11
+ COPY app.py ./
12
+
13
  RUN chmod -R 777 /workspace/*
14
 
15
  CMD ["python", "app.py"]
app.py CHANGED
@@ -16,6 +16,7 @@ def convert_to_wav(in_filename: str) -> str:
16
  else:
17
  _ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
18
  speech, _ = soundfile.read(out_filename)
 
19
  return speech
20
 
21
  def file_transcribe(microphone, file_upload):
@@ -33,7 +34,13 @@ def file_transcribe(microphone, file_upload):
33
 
34
  speech = convert_to_wav(file)
35
 
36
- text = "\n".join([item for item in transcribe(speech, asr_model, vad_model, punc_model)])
 
 
 
 
 
 
37
 
38
  return warn_output + text
39
 
@@ -55,7 +62,13 @@ def youtube_transcribe(yt_url):
55
  stream.download(filename=filename)
56
 
57
  speech=convert_to_wav(filename)
58
- text = "\n".join([item for item in transcribe(speech, asr_model, vad_model, punc_model)])
 
 
 
 
 
 
59
  os.system(f"rm -rf audio.mp3 audio.mp3.wav")
60
  return html_embed_str, text
61
 
 
16
  else:
17
  _ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
18
  speech, _ = soundfile.read(out_filename)
19
+ print(f"load speech shape {speech.shape}")
20
  return speech
21
 
22
  def file_transcribe(microphone, file_upload):
 
34
 
35
  speech = convert_to_wav(file)
36
 
37
+ items = []
38
+ vad_model.vad_scorer.AllResetDetection()
39
+ for item in transcribe(speech, asr_model, vad_model, punc_model):
40
+ items.append(item)
41
+ print(item)
42
+
43
+ text = "\n".join(items)
44
 
45
  return warn_output + text
46
 
 
62
  stream.download(filename=filename)
63
 
64
  speech=convert_to_wav(filename)
65
+ items = []
66
+ vad_model.vad_scorer.AllResetDetection()
67
+ for item in transcribe(speech, asr_model, vad_model, punc_model):
68
+ items.append(item)
69
+ print(item)
70
+
71
+ text = "\n".join(items)
72
  os.system(f"rm -rf audio.mp3 audio.mp3.wav")
73
  return html_embed_str, text
74