nshmyrevgmail commited on
Commit
196d65f
β€’
1 Parent(s): 71895bb

Initial version

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. app.py +116 -0
  3. packages.txt +1 -0
  4. requirements.txt +1 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Asr
3
  emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.0.26
8
  app_file: app.py
9
- pinned: false
10
  license: apache-2.0
11
  ---
12
 
 
1
  ---
2
+ title: Automatic Speech Recognition
3
  emoji: 🌍
4
+ colorFrom: magenta
5
+ colorTo: magenta
6
  sdk: gradio
7
  sdk_version: 3.0.26
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
  ---
12
 
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ import gradio as gr
4
+ import vosk
5
+ import json
6
+ import subprocess
7
+
8
+ logging.basicConfig(
9
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
10
+ datefmt="%m/%d/%Y %H:%M:%S",
11
+ handlers=[logging.StreamHandler(sys.stdout)],
12
+ )
13
+ logger = logging.getLogger(__name__)
14
+ logger.setLevel(logging.DEBUG)
15
+
16
+ LARGE_MODEL_BY_LANGUAGE = {
17
+ "Russian": {"model_id": "vosk-model-ru-0.22"},
18
+ "Chinese": {"model_id": "vosk-model-cn-0.22"},
19
+ "English": {"model_id": "vosk-model-en-us-0.22"},
20
+ "French": {"model_id": "vosk-model-fr-0.22"},
21
+ "German": {"model_id": "vosk-model-de-0.22"},
22
+ "Italian": {"model_id": "vosk-model-it-0.22"},
23
+ "Japanese": {"model_id": "vosk-model-ja-0.22"},
24
+ "Persian": {"model_id": "vosk-model-fa-0.5"},
25
+ }
26
+
27
+ LANGUAGES = sorted(LARGE_MODEL_BY_LANGUAGE.keys())
28
+ CACHED_MODELS_BY_ID = {}
29
+
30
+ def asr(model, input_file):
31
+
32
+ rec = vosk.KaldiRecognizer(model, 16000.0)
33
+ results = []
34
+
35
+ process = subprocess.Popen(f'ffmpeg -loglevel quiet -i {input_file} -ar 16000 -ac 1 -f s16le -'.split(),
36
+ stdout=subprocess.PIPE)
37
+
38
+ while True:
39
+ data = process.stdout.read(4000)
40
+ if len(data) == 0:
41
+ break
42
+ if rec.AcceptWaveform(data):
43
+ jres = json.loads(rec.Result())
44
+ results.append(jres['text'])
45
+
46
+ jres = json.loads(rec.FinalResult())
47
+ results.append(jres['text'])
48
+
49
+ return " ".join(results)
50
+
51
+
52
+ def run(input_file, language, history):
53
+
54
+ logger.info(f"Running ASR for {language} for {input_file}")
55
+
56
+ history = history or []
57
+
58
+ model = LARGE_MODEL_BY_LANGUAGE.get(language, None)
59
+
60
+ if model is None:
61
+ history.append({
62
+ "error_message": f"Failed to find a model for {language} language :("
63
+ })
64
+ else:
65
+ model_instance = CACHED_MODELS_BY_ID.get(model["model_id"], None)
66
+ if model_instance is None:
67
+ model_instance = vosk.Model(model_name=model["model_id"])
68
+ CACHED_MODELS_BY_ID[model["model_id"]] = model_instance
69
+
70
+ transcription = asr(model_instance, input_file.name)
71
+
72
+ logger.info(f"Transcription for {input_file}: {transcription}")
73
+
74
+ history.append({
75
+ "model_id": model["model_id"],
76
+ "language": language,
77
+ "transcription": transcription,
78
+ "error_message": None
79
+ })
80
+
81
+ html_output = "<div class='result'>"
82
+ for item in history:
83
+ if item["error_message"] is not None:
84
+ html_output += f"<div class='result_item result_item_error'>{item['error_message']}</div>"
85
+ else:
86
+ html_output += "<div class='result_item result_item_success'>"
87
+ html_output += f'{item["transcription"]}<br/>'
88
+ html_output += "</div>"
89
+ html_output += "</div>"
90
+
91
+ return html_output, history
92
+
93
+
94
+ gr.Interface(
95
+ run,
96
+ inputs=[
97
+ gr.inputs.Audio(source="microphone", type="file", label="Record something..."),
98
+ gr.inputs.Radio(label="Language", choices=LANGUAGES),
99
+ "state"
100
+ ],
101
+ outputs=[
102
+ gr.outputs.HTML(label="Outputs"),
103
+ "state"
104
+ ],
105
+ title="Automatic Speech Recognition",
106
+ description="",
107
+ css="""
108
+ .result {display:flex;flex-direction:column}
109
+ .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
110
+ .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
111
+ .result_item_error {background-color:#ff7070;color:white;align-self:start}
112
+ """,
113
+ allow_screenshot=False,
114
+ allow_flagging="never",
115
+ theme="grass"
116
+ ).launch(enable_queue=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ vosk