Spaces:
Running
Running
jason-on-salt-a40
commited on
Commit
•
3075679
1
Parent(s):
9227743
updated libs for easier installation, cannot upload audio, recorded audio doesn't work
Browse files- README.md +5 -0
- app.py +3 -3
- data/__pycache__/__init__.cpython-310.pyc +0 -0
- data/__pycache__/tokenizer.cpython-310.pyc +0 -0
- models/__pycache__/codebooks_patterns.cpython-310.pyc +0 -0
- models/__pycache__/voicecraft.cpython-310.pyc +0 -0
- models/modules/__pycache__/__init__.cpython-310.pyc +0 -0
- models/modules/__pycache__/activation.cpython-310.pyc +0 -0
- models/modules/__pycache__/embedding.cpython-310.pyc +0 -0
- models/modules/__pycache__/scaling.cpython-310.pyc +0 -0
- models/modules/__pycache__/transformer.cpython-310.pyc +0 -0
- models/modules/__pycache__/utils.cpython-310.pyc +0 -0
- pretrained_models/giga830M.pth +3 -0
- requirements.txt +2 -5
README.md
CHANGED
@@ -11,3 +11,8 @@ license: cc-by-nc-sa-4.0
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
```bash
|
16 |
+
conda create -n voicecraft_gradio python=3.10.13
|
17 |
+
pip install -r requirements.txt
|
18 |
+
```
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
3 |
-
# os.environ["CUDA_VISIBLE_DEVICES"] = "
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
import torchaudio
|
@@ -381,7 +381,7 @@ with gr.Blocks() as app:
|
|
381 |
|
382 |
with gr.Row():
|
383 |
with gr.Column(scale=2):
|
384 |
-
input_audio = gr.Audio(value="./demo/84_121550_000074_000000.wav", label="Input Audio", type="filepath")
|
385 |
with gr.Group():
|
386 |
original_transcript = gr.Textbox(label="Original transcript", lines=5, value=demo_original_transcript, interactive=False,
|
387 |
info="Use whisper model to get the transcript. Fix it if necessary.")
|
@@ -469,7 +469,7 @@ with gr.Blocks() as app:
|
|
469 |
inputs=[whisper_model_choice, voicecraft_model_choice],
|
470 |
outputs=[models_selector])
|
471 |
|
472 |
-
input_audio.
|
473 |
inputs=[input_audio],
|
474 |
outputs=[prompt_end_time, edit_start_time, edit_end_time])
|
475 |
transcribe_btn.click(fn=transcribe,
|
|
|
1 |
import os
|
2 |
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
3 |
+
# os.environ["CUDA_VISIBLE_DEVICES"] = "1" # these are only used if developping locally
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
import torchaudio
|
|
|
381 |
|
382 |
with gr.Row():
|
383 |
with gr.Column(scale=2):
|
384 |
+
input_audio = gr.Audio(sources=["upload", "microphone"], value="./demo/84_121550_000074_000000.wav", label="Input Audio", type="filepath", interactive=True)
|
385 |
with gr.Group():
|
386 |
original_transcript = gr.Textbox(label="Original transcript", lines=5, value=demo_original_transcript, interactive=False,
|
387 |
info="Use whisper model to get the transcript. Fix it if necessary.")
|
|
|
469 |
inputs=[whisper_model_choice, voicecraft_model_choice],
|
470 |
outputs=[models_selector])
|
471 |
|
472 |
+
input_audio.change(fn=update_input_audio,
|
473 |
inputs=[input_audio],
|
474 |
outputs=[prompt_end_time, edit_start_time, edit_end_time])
|
475 |
transcribe_btn.click(fn=transcribe,
|
data/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/data/__pycache__/__init__.cpython-310.pyc and b/data/__pycache__/__init__.cpython-310.pyc differ
|
|
data/__pycache__/tokenizer.cpython-310.pyc
CHANGED
Binary files a/data/__pycache__/tokenizer.cpython-310.pyc and b/data/__pycache__/tokenizer.cpython-310.pyc differ
|
|
models/__pycache__/codebooks_patterns.cpython-310.pyc
CHANGED
Binary files a/models/__pycache__/codebooks_patterns.cpython-310.pyc and b/models/__pycache__/codebooks_patterns.cpython-310.pyc differ
|
|
models/__pycache__/voicecraft.cpython-310.pyc
CHANGED
Binary files a/models/__pycache__/voicecraft.cpython-310.pyc and b/models/__pycache__/voicecraft.cpython-310.pyc differ
|
|
models/modules/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/models/modules/__pycache__/__init__.cpython-310.pyc and b/models/modules/__pycache__/__init__.cpython-310.pyc differ
|
|
models/modules/__pycache__/activation.cpython-310.pyc
CHANGED
Binary files a/models/modules/__pycache__/activation.cpython-310.pyc and b/models/modules/__pycache__/activation.cpython-310.pyc differ
|
|
models/modules/__pycache__/embedding.cpython-310.pyc
CHANGED
Binary files a/models/modules/__pycache__/embedding.cpython-310.pyc and b/models/modules/__pycache__/embedding.cpython-310.pyc differ
|
|
models/modules/__pycache__/scaling.cpython-310.pyc
CHANGED
Binary files a/models/modules/__pycache__/scaling.cpython-310.pyc and b/models/modules/__pycache__/scaling.cpython-310.pyc differ
|
|
models/modules/__pycache__/transformer.cpython-310.pyc
CHANGED
Binary files a/models/modules/__pycache__/transformer.cpython-310.pyc and b/models/modules/__pycache__/transformer.cpython-310.pyc differ
|
|
models/modules/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/models/modules/__pycache__/utils.cpython-310.pyc and b/models/modules/__pycache__/utils.cpython-310.pyc differ
|
|
pretrained_models/giga830M.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2454b51575822a04d24a00f8ba78f201f916439ffa62a3c1ac0ffa5220f429e3
|
3 |
+
size 3358342977
|
requirements.txt
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
-
-e git+https://github.com/facebookresearch/audiocraft.git@
|
2 |
-
xformers==0.0.22
|
3 |
-
torchaudio==2.0.2
|
4 |
-
torch==2.0.1
|
5 |
phonemizer==3.2.1
|
6 |
-
gradio
|
7 |
nltk>=3.8.1
|
8 |
openai-whisper>=20231117
|
9 |
spaces
|
|
|
1 |
+
-e git+https://github.com/facebookresearch/audiocraft.git@f83babff6b5e97f75562127c4cc8122229c8f099#egg=audiocraft
|
|
|
|
|
|
|
2 |
phonemizer==3.2.1
|
3 |
+
gradio
|
4 |
nltk>=3.8.1
|
5 |
openai-whisper>=20231117
|
6 |
spaces
|