devilent2 commited on
Commit
dde0a2b
1 Parent(s): 00f1499

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -68
app.py CHANGED
@@ -10,6 +10,8 @@ from transformers.pipelines.audio_utils import ffmpeg_read
10
  DEFAULT_MODEL_NAME = "distil-whisper/distil-large-v3"
11
  BATCH_SIZE = 8
12
 
 
 
13
  device = 0 if torch.cuda.is_available() else "cpu"
14
  if device == "cpu":
15
  DEFAULT_MODEL_NAME = "openai/whisper-tiny"
@@ -23,42 +25,17 @@ def load_pipeline(model_name):
23
  )
24
 
25
  pipe = load_pipeline(DEFAULT_MODEL_NAME)
 
 
26
 
27
- @spaces.GPU
28
- def transcribe(inputs, task, model_name):
29
- if inputs is None:
30
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
31
-
32
- global pipe
33
- if model_name != pipe.model.name_or_path:
34
- pipe = load_pipeline(model_name)
35
-
36
- start_time = time.time() # Record the start time
37
-
38
- # Load the audio file and calculate its duration
39
- audio = mp.AudioFileClip(inputs)
40
- audio_duration = audio.duration
41
-
42
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
43
- end_time = time.time() # Record the end time
44
-
45
- transcription_time = end_time - start_time # Calculate the transcription time
46
-
47
- # Create the transcription time output with additional information
48
- transcription_time_output = (
49
- f"Transcription Time: {transcription_time:.2f} seconds\n"
50
- f"Audio Duration: {audio_duration:.2f} seconds\n"
51
- f"Model Used: {model_name}\n"
52
- f"Device Used: {'GPU' if torch.cuda.is_available() else 'CPU'}"
53
- )
54
 
55
- return text, transcription_time_output
56
 
57
  from gpustat import GPUStatCollection
58
 
59
  def update_gpu_status():
60
  if torch.cuda.is_available() == False:
61
- return "No Nviadia Device"
62
  try:
63
  gpu_stats = GPUStatCollection.new_query()
64
  for gpu in gpu_stats:
@@ -81,9 +58,10 @@ def torch_update_gpu_status():
81
  gpu_info = torch.cuda.get_device_name(0)
82
  gpu_memory = torch.cuda.mem_get_info(0)
83
  total_memory = gpu_memory[1] / (1024 * 1024)
 
84
  used_memory = (gpu_memory[1] - gpu_memory[0]) / (1024 * 1024)
85
 
86
- gpu_status = f"GPU: {gpu_info}\nTotal Memory: {total_memory:.2f} MB\nUsed Memory: {used_memory:.2f} MB"
87
  else:
88
  gpu_status = "No GPU available"
89
  return gpu_status
@@ -102,70 +80,117 @@ def update_cpu_status():
102
  def update_status():
103
  gpu_status = update_gpu_status()
104
  cpu_status = update_cpu_status()
105
- return gpu_status, cpu_status
 
106
 
107
  def refresh_status():
108
  return update_status()
109
 
110
- demo = gr.Blocks()
111
 
112
- mf_transcribe = gr.Interface(
113
- fn=transcribe,
114
- inputs=[
115
- gr.Audio(type="filepath"),
116
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
117
- gr.Textbox(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  label="Model Name",
119
  value=DEFAULT_MODEL_NAME,
120
  placeholder="Enter the model name",
121
- info="Some available models: distil-whisper/distil-large-v3 distil-whisper/distil-medium.en Systran/faster-distil-whisper-large-v3 Systran/faster-whisper-large-v3 Systran/faster-whisper-medium openai/whisper-tiny, openai/whisper-base, openai/whisper-medium, openai/whisper-large-v3",
122
- ),
123
- ],
124
- outputs=[gr.TextArea(label="Transcription"), gr.TextArea(label="Transcription Info")],
125
- theme="huggingface",
126
- title="Whisper Transcription",
127
- description=(
128
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the specified OpenAI Whisper"
129
- " checkpoint and 🤗 Transformers to transcribe audio files of arbitrary length."
130
- ),
131
- allow_flagging="never",
132
- )
133
 
134
- file_transcribe = gr.Interface(
135
- fn=transcribe,
136
  inputs=[
137
- gr.Audio(type="filepath", label="Audio file"),
138
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
139
- gr.Textbox(
140
- label="Model Name",
141
- value=DEFAULT_MODEL_NAME,
142
- placeholder="Enter the model name",
143
- info="Some available models: openai/whisper-tiny, openai/whisper-base, openai/whisper-medium, openai/whisper-large-v2",
144
- ),
145
  ],
146
- outputs=[gr.TextArea(label="Transcription"), gr.TextArea(label="Transcription Info")],
147
  theme="huggingface",
148
  title="Whisper Transcription",
149
  description=(
150
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the specified OpenAI Whisper"
151
- " checkpoint and 🤗 Transformers to transcribe audio files of arbitrary length."
152
  ),
153
  allow_flagging="never",
154
  )
 
 
 
 
 
155
  with demo:
156
- gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
157
 
158
  with gr.Row():
159
  refresh_button = gr.Button("Refresh Status") # Create a refresh button
160
 
161
- gpu_status_output = gr.Textbox(label="GPU Status", interactive=False)
162
- cpu_status_output = gr.Textbox(label="CPU Status", interactive=False)
163
 
164
  # Link the refresh button to the refresh_status function
165
- refresh_button.click(refresh_status, None, [gpu_status_output, cpu_status_output])
166
 
167
  # Load the initial status using update_status function
168
- demo.load(update_status, inputs=None, outputs=[gpu_status_output, cpu_status_output], every=2, queue=False)
 
 
 
 
169
 
170
  # Launch the Gradio app
171
  demo.launch(share=True)
 
 
 
10
  DEFAULT_MODEL_NAME = "distil-whisper/distil-large-v3"
11
  BATCH_SIZE = 8
12
 
13
+ print('start app')
14
+
15
  device = 0 if torch.cuda.is_available() else "cpu"
16
  if device == "cpu":
17
  DEFAULT_MODEL_NAME = "openai/whisper-tiny"
 
25
  )
26
 
27
  pipe = load_pipeline(DEFAULT_MODEL_NAME)
28
+ openai_pipe=load_pipeline("openai/whisper-large-v3")
29
+ default_pipe = load_pipeline(DEFAULT_MODEL_NAME)
30
 
31
+ #pipe = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
33
 
34
  from gpustat import GPUStatCollection
35
 
36
  def update_gpu_status():
37
  if torch.cuda.is_available() == False:
38
+ return "No Nvidia Device"
39
  try:
40
  gpu_stats = GPUStatCollection.new_query()
41
  for gpu in gpu_stats:
 
58
  gpu_info = torch.cuda.get_device_name(0)
59
  gpu_memory = torch.cuda.mem_get_info(0)
60
  total_memory = gpu_memory[1] / (1024 * 1024)
61
+ free_memory=gpu_memory[0] /(1024 *1024)
62
  used_memory = (gpu_memory[1] - gpu_memory[0]) / (1024 * 1024)
63
 
64
+ gpu_status = f"GPU: {gpu_info} Free Memory:{free_memory}MB Total Memory: {total_memory:.2f} MB Used Memory: {used_memory:.2f} MB"
65
  else:
66
  gpu_status = "No GPU available"
67
  return gpu_status
 
80
  def update_status():
81
  gpu_status = update_gpu_status()
82
  cpu_status = update_cpu_status()
83
+ sys_status=gpu_status+"\n\n"+cpu_status
84
+ return sys_status
85
 
86
  def refresh_status():
87
  return update_status()
88
 
 
89
 
90
+ @spaces.GPU
91
+ def transcribe(audio_path, model_name):
92
+ print(str(time.time())+' start transcribe ')
93
+
94
+ if audio_path is None:
95
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
96
+
97
+ audio_path=audio_path.strip()
98
+ model_name=model_name.strip()
99
+
100
+ global pipe
101
+ if model_name != pipe.model.name_or_path:
102
+ print("old model is:"+ pipe.model.name_or_path )
103
+ if model_name=="openai/whisper-large-v3":
104
+ pipe=openai_pipe
105
+ print(str(time.time())+" use openai model " + pipe.model.name_or_path)
106
+ elif model_name==DEFAULT_MODEL_NAME:
107
+ pipe=default_pipe
108
+ print(str(time.time())+" use default model " + pipe.model.name_or_path)
109
+ else:
110
+ print(str(time.time())+' start load model ' + model_name)
111
+ pipe = load_pipeline(model_name)
112
+ print(str(time.time())+' finished load model ' + model_name)
113
+
114
+ start_time = time.time() # Record the start time
115
+ print(str(time.time())+' start processing and set recording start time point')
116
+ # Load the audio file and calculate its duration
117
+ audio = mp.AudioFileClip(audio_path)
118
+ audio_duration = audio.duration
119
+ print(str(time.time())+' start pipe ')
120
+ text = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
121
+ end_time = time.time() # Record the end time
122
+
123
+ transcription_time = end_time - start_time # Calculate the transcription time
124
+
125
+ # Create the transcription time output with additional information
126
+ transcription_time_output = (
127
+ f"Transcription Time: {transcription_time:.2f} seconds\n"
128
+ f"Audio Duration: {audio_duration:.2f} seconds\n"
129
+ f"Model Used: {model_name}\n"
130
+ f"Device Used: {'GPU' if torch.cuda.is_available() else 'CPU'}"
131
+ )
132
+
133
+ print(str(time.time())+' return transcribe '+ text )
134
+
135
+ return text, transcription_time_output
136
+
137
+ @spaces.GPU
138
+ def handle_upload_audio(audio_path,model_name,old_transcription=''):
139
+ print('old_trans:' + old_transcription)
140
+ (text,transcription_time_output)=transcribe(audio_path,model_name)
141
+ return text+'\n\n'+old_transcription, transcription_time_output
142
+
143
+ graudio=gr.Audio(type="filepath",show_download_button=True)
144
+ grmodel_textbox=gr.Textbox(
145
  label="Model Name",
146
  value=DEFAULT_MODEL_NAME,
147
  placeholder="Enter the model name",
148
+ info="Some available models: distil-whisper/distil-large-v3 distil-whisper/distil-medium.en Systran/faster-distil-whisper-large-v3 Systran/faster-whisper-large-v3 Systran/faster-whisper-medium openai/whisper-tiny, openai/whisper-base, openai/whisper-medium, openai/whisper-large-v3",
149
+ )
150
+ groutputs=[gr.TextArea(label="Transcription",elem_id="transcription_textarea",interactive=True,lines=20,show_copy_button=True),
151
+ gr.TextArea(label="Transcription Info",interactive=True,show_copy_button=True)]
 
 
 
 
 
 
 
 
152
 
153
+ mf_transcribe = gr.Interface(
154
+ fn=handle_upload_audio,
155
  inputs=[
156
+ graudio, #"numpy" or filepath
157
+ #gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
158
+ grmodel_textbox,
 
 
 
 
 
159
  ],
160
+ outputs=groutputs,
161
  theme="huggingface",
162
  title="Whisper Transcription",
163
  description=(
164
+ "Scroll to Bottom to show system status. "
165
+ "Transcribe long-form microphone or audio file after uploaded audio! "
166
  ),
167
  allow_flagging="never",
168
  )
169
+
170
+
171
+ demo = gr.Blocks()
172
+
173
+
174
  with demo:
175
+ gr.TabbedInterface([mf_transcribe, ], ["Audio",])
176
 
177
  with gr.Row():
178
  refresh_button = gr.Button("Refresh Status") # Create a refresh button
179
 
180
+ sys_status_output = gr.Textbox(label="System Status", interactive=False)
181
+
182
 
183
  # Link the refresh button to the refresh_status function
184
+ refresh_button.click(refresh_status, None, [sys_status_output])
185
 
186
  # Load the initial status using update_status function
187
+ demo.load(update_status, inputs=None, outputs=[sys_status_output], every=2, queue=False)
188
+
189
+ graudio.stop_recording(handle_upload_audio,inputs=[graudio,grmodel_textbox,groutputs[0]],outputs=groutputs)
190
+ graudio.upload(handle_upload_audio,inputs=[graudio,grmodel_textbox,groutputs[0]],outputs=groutputs)
191
+
192
 
193
  # Launch the Gradio app
194
  demo.launch(share=True)
195
+
196
+ print('launched\n\n')