Spaces:
Runtime error
Runtime error
guyyariv
commited on
Commit
•
44620f0
1
Parent(s):
04757be
AudioTokenDemo
Browse files- app.py +16 -2
- requirements.txt +1 -0
app.py
CHANGED
@@ -8,6 +8,7 @@ from diffusers.models.attention_processor import LoRAAttnProcessor
|
|
8 |
from diffusers import StableDiffusionPipeline
|
9 |
import numpy as np
|
10 |
import gradio as gr
|
|
|
11 |
|
12 |
|
13 |
class AudioTokenWrapper(torch.nn.Module):
|
@@ -90,10 +91,23 @@ class AudioTokenWrapper(torch.nn.Module):
|
|
90 |
|
91 |
|
92 |
def greet(audio):
|
93 |
-
audio = audio
|
|
|
|
|
|
|
94 |
if audio.ndim == 2:
|
95 |
audio = audio.sum(axis=1) / 2
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
weight_dtype = torch.float32
|
98 |
prompt = 'a photo of <*>'
|
99 |
|
@@ -143,6 +157,6 @@ if __name__ == "__main__":
|
|
143 |
outputs="image",
|
144 |
title='AudioToken',
|
145 |
description=description,
|
146 |
-
examples=examples
|
147 |
)
|
148 |
demo.launch()
|
|
|
8 |
from diffusers import StableDiffusionPipeline
|
9 |
import numpy as np
|
10 |
import gradio as gr
|
11 |
+
from scipy import signal
|
12 |
|
13 |
|
14 |
class AudioTokenWrapper(torch.nn.Module):
|
|
|
91 |
|
92 |
|
93 |
def greet(audio):
|
94 |
+
sample_rate, audio = audio
|
95 |
+
audio = audio.astype(np.float32, order='C') / 32768.0
|
96 |
+
desired_sample_rate = 16000
|
97 |
+
|
98 |
if audio.ndim == 2:
|
99 |
audio = audio.sum(axis=1) / 2
|
100 |
|
101 |
+
if sample_rate != desired_sample_rate:
|
102 |
+
# Calculate the resampling ratio
|
103 |
+
resample_ratio = desired_sample_rate / sample_rate
|
104 |
+
|
105 |
+
# Determine the new length of the audio data after downsampling
|
106 |
+
new_length = int(len(audio) * resample_ratio)
|
107 |
+
|
108 |
+
# Downsample the audio data using resample
|
109 |
+
audio = signal.resample(audio, new_length)
|
110 |
+
|
111 |
weight_dtype = torch.float32
|
112 |
prompt = 'a photo of <*>'
|
113 |
|
|
|
157 |
outputs="image",
|
158 |
title='AudioToken',
|
159 |
description=description,
|
160 |
+
# examples=examples
|
161 |
)
|
162 |
demo.launch()
|
requirements.txt
CHANGED
@@ -9,3 +9,4 @@ Pillow
|
|
9 |
pandas
|
10 |
torchaudio
|
11 |
datasets
|
|
|
|
9 |
pandas
|
10 |
torchaudio
|
11 |
datasets
|
12 |
+
scipy
|