Spaces:
Paused
Paused
nithinraok
commited on
Commit
β’
50262ab
1
Parent(s):
ede25a6
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from nemo.collections.asr.models import
|
2 |
import yt_dlp as youtube_dl
|
3 |
import os
|
4 |
import tempfile
|
@@ -7,10 +7,10 @@ import gradio as gr
|
|
7 |
from pydub import AudioSegment
|
8 |
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
-
MODEL_NAME="nvidia/parakeet-
|
11 |
YT_LENGTH_LIMIT_S=3600
|
12 |
|
13 |
-
model =
|
14 |
model.eval()
|
15 |
|
16 |
def get_transcripts(audio_path):
|
@@ -19,8 +19,8 @@ def get_transcripts(audio_path):
|
|
19 |
|
20 |
article = (
|
21 |
"<p style='text-align: center'>"
|
22 |
-
"<a href='https://huggingface.co/nvidia/parakeet-
|
23 |
-
"<a href='https://arxiv.org/abs/
|
24 |
"<a href='https://github.com/NVIDIA/NeMo' target='_blank'>π§βπ» Repository</a>"
|
25 |
"</p>"
|
26 |
)
|
@@ -92,11 +92,11 @@ mf_transcribe = gr.Interface(
|
|
92 |
],
|
93 |
outputs="text",
|
94 |
theme="huggingface",
|
95 |
-
title="Parakeet
|
96 |
description=(
|
97 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
98 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
99 |
-
" of arbitrary length."
|
100 |
),
|
101 |
allow_flagging="never",
|
102 |
)
|
@@ -108,11 +108,11 @@ file_transcribe = gr.Interface(
|
|
108 |
],
|
109 |
outputs="text",
|
110 |
theme="huggingface",
|
111 |
-
title="Parakeet
|
112 |
description=(
|
113 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
114 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
115 |
-
" of arbitrary length."
|
116 |
),
|
117 |
allow_flagging="never",
|
118 |
)
|
@@ -124,11 +124,11 @@ youtube_transcribe = gr.Interface(
|
|
124 |
],
|
125 |
outputs=["html", "text"],
|
126 |
theme="huggingface",
|
127 |
-
title="Parakeet
|
128 |
description=(
|
129 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
130 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
131 |
-
" of arbitrary length."
|
132 |
),
|
133 |
allow_flagging="never",
|
134 |
)
|
|
|
1 |
+
from nemo.collections.asr.models import ASRModel
|
2 |
import yt_dlp as youtube_dl
|
3 |
import os
|
4 |
import tempfile
|
|
|
7 |
from pydub import AudioSegment
|
8 |
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
+
MODEL_NAME="nvidia/parakeet-tdt-1.1b"
|
11 |
YT_LENGTH_LIMIT_S=3600
|
12 |
|
13 |
+
model = ASRModel.from_pretrained(model_name=MODEL_NAME).to(device)
|
14 |
model.eval()
|
15 |
|
16 |
def get_transcripts(audio_path):
|
|
|
19 |
|
20 |
article = (
|
21 |
"<p style='text-align: center'>"
|
22 |
+
"<a href='https://huggingface.co/nvidia/parakeet-tdt-1.1b' target='_blank'>ποΈ Learn more about Parakeet TDT model</a> | "
|
23 |
+
"<a href='https://arxiv.org/abs/2304.06795' target='_blank'>π TDT ICML paper</a> | "
|
24 |
"<a href='https://github.com/NVIDIA/NeMo' target='_blank'>π§βπ» Repository</a>"
|
25 |
"</p>"
|
26 |
)
|
|
|
92 |
],
|
93 |
outputs="text",
|
94 |
theme="huggingface",
|
95 |
+
title="Parakeet TDT 1.1B: Transcribe Audio",
|
96 |
description=(
|
97 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
98 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
99 |
+
" of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
|
100 |
),
|
101 |
allow_flagging="never",
|
102 |
)
|
|
|
108 |
],
|
109 |
outputs="text",
|
110 |
theme="huggingface",
|
111 |
+
title="Parakeet TDT 1.1B: Transcribe Audio",
|
112 |
description=(
|
113 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
114 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
115 |
+
" of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
|
116 |
),
|
117 |
allow_flagging="never",
|
118 |
)
|
|
|
124 |
],
|
125 |
outputs=["html", "text"],
|
126 |
theme="huggingface",
|
127 |
+
title="Parakeet TDT 1.1B: Transcribe Audio",
|
128 |
description=(
|
129 |
"Transcribe microphone or audio inputs with the click of a button! Demo uses the"
|
130 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) to transcribe audio files"
|
131 |
+
" of arbitrary length. TDT models are 75% more efficient than similar size RNNT model"
|
132 |
),
|
133 |
allow_flagging="never",
|
134 |
)
|