Nag189 commited on
Commit
00b0d1a
1 Parent(s): 4300cef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -110
app.py CHANGED
@@ -1,131 +1,68 @@
1
  import streamlit as st
2
  import time
3
  from datetime import datetime
4
- from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan,SpeechT5ForTextToSpeech
5
  import numpy as np
6
  import torch
7
  from io import StringIO
8
  import soundfile as sf
9
 
 
 
 
 
10
 
11
- html_temp= """
12
- <div style="background-color:tomato;padding:10px">
13
- <h2 style="color:white;text-align:centre;"> Text-to-Speech </h2>
14
- </div>
15
- """
16
- st.markdown(html_temp,unsafe_allow_html=True)
17
 
18
- st.markdown(
19
- """
20
- Text to Audio Conversion.
21
- """
22
- )
23
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
24
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
25
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
26
 
27
- speaker_embeddings = np.load("cmu_us_slt_arctic-wav-arctic_a0508.npy")
28
- speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
 
 
 
 
 
29
 
30
- text = st.text_area("Type your text..")
31
- st.button("Convert")
32
- inputs = processor(text=text, return_tensors="pt")
33
- spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
34
- with torch.no_grad():
35
- speech = vocoder(spectrogram)
36
- sf.write("speech.wav", speech.numpy(), samplerate=16000)
37
-
38
- audio_file = open('speech.wav', 'rb')
39
- audio_bytes = audio_file.read()
40
- st.audio(audio_bytes, format='audio/wav')
41
 
 
 
 
 
 
42
 
43
- uploaded_file=st.file_uploader("Upload your text file here",type=['txt'] )
44
- if uploaded_file is not None:
45
- stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
46
- #To read file as string:
47
- text = stringio.read()
48
- st.write(text)
49
-
50
- st.button("Convert",key=1)
51
  inputs = processor(text=text, return_tensors="pt")
52
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
53
  with torch.no_grad():
54
  speech = vocoder(spectrogram)
55
  sf.write("speech.wav", speech.numpy(), samplerate=16000)
56
- audio_file = open('speech.wav', 'rb')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  audio_bytes = audio_file.read()
58
  st.audio(audio_bytes, format='audio/wav')
59
-
60
-
61
- ## footer
62
- from htbuilder import HtmlElement, div, ul, li, br, hr, a, p, img, styles, classes, fonts
63
- from htbuilder.units import percent, px
64
- from htbuilder.funcs import rgba, rgb
65
-
66
-
67
- def image(src_as_string, **style):
68
- return img(src=src_as_string, style=styles(**style))
69
-
70
-
71
- def link(link, text, **style):
72
- return a(_href=link, _target="_blank", style=styles(**style))(text)
73
-
74
-
75
- def layout(*args):
76
- style = """
77
- <style>
78
- # MainMenu {visibility: hidden;}
79
- footer {visibility: hidden;}
80
- .stApp { bottom: 105px; }
81
- </style>
82
- """
83
-
84
- style_div = styles(
85
- position="fixed",
86
- left=0,
87
- bottom=0,
88
- margin=px(0, 0, 0, 0),
89
- width=percent(100),
90
- color="black",
91
- text_align="center",
92
- height="auto",
93
- opacity=1
94
- )
95
-
96
- style_hr = styles(
97
- display="block",
98
- margin=px(8, 8, "auto", "auto"),
99
- border_style="solid",
100
- border_width=px(0.5)
101
- )
102
-
103
- body = p()
104
- foot = div(
105
- style=style_div
106
- )(
107
- hr(
108
- style=style_hr
109
- ),
110
- body
111
- )
112
- st.markdown(style,unsafe_allow_html=True)
113
-
114
- for arg in args:
115
- if isinstance(arg, str):
116
- body(arg)
117
-
118
- elif isinstance(arg, HtmlElement):
119
- body(arg)
120
-
121
- st.markdown(str(foot), unsafe_allow_html=True)
122
-
123
-
124
- def footer():
125
- myargs = [
126
- "©️ Apps Consultants",
127
- ]
128
- layout(*myargs)
129
-
130
- if __name__ == "__main__":
131
- footer()
 
1
  import streamlit as st
2
  import time
3
  from datetime import datetime
4
+ from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan, SpeechT5ForTextToSpeech
5
  import numpy as np
6
  import torch
7
  from io import StringIO
8
  import soundfile as sf
9
 
10
+ # Improved Styling
11
+ def local_css(file_name):
12
+ with open(file_name) as f:
13
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
14
 
15
+ local_css("style.css") # Assuming a CSS file named 'style.css' in the same directory
 
 
 
 
 
16
 
17
+ # Streamlined Layout
18
+ st.title("Text-to-Voice Conversion")
19
+ st.markdown("Convert your text to speech using advanced AI models.")
 
 
 
 
 
20
 
21
+ # Load models outside of function calls for efficiency
22
+ @st.cache(allow_output_mutation=True)
23
+ def load_models():
24
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
25
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
26
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
27
+ return model, processor, vocoder
28
 
29
+ model, processor, vocoder = load_models()
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Load speaker embeddings
32
+ @st.cache(allow_output_mutation=True)
33
+ def get_speaker_embeddings():
34
+ speaker_embeddings = np.load("cmu_us_slt_arctic-wav-arctic_a0508.npy")
35
+ return torch.tensor(speaker_embeddings).unsqueeze(0)
36
 
37
+ speaker_embeddings = get_speaker_embeddings()
38
+
39
+ # Text Input
40
+ text = st.text_area("Type your text or upload a text file below.")
41
+
42
+ # Function to convert text to speech
43
+ def text_to_speech(text):
 
44
  inputs = processor(text=text, return_tensors="pt")
45
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
46
  with torch.no_grad():
47
  speech = vocoder(spectrogram)
48
  sf.write("speech.wav", speech.numpy(), samplerate=16000)
49
+ return "speech.wav"
50
+
51
+ # Convert Button
52
+ if st.button("Convert"):
53
+ if text:
54
+ audio_path = text_to_speech(text)
55
+ audio_file = open(audio_path, 'rb')
56
+ audio_bytes = audio_file.read()
57
+ st.audio(audio_bytes, format='audio/wav')
58
+ else:
59
+ st.error("Please enter some text to convert.")
60
+
61
+ # File Uploader
62
+ uploaded_file = st.file_uploader("Upload your text file here", type=['txt'])
63
+ if uploaded_file is not None:
64
+ text = uploaded_file.getvalue().decode("utf-8")
65
+ audio_path = text_to_speech(text)
66
+ audio_file = open(audio_path, 'rb')
67
  audio_bytes = audio_file.read()
68
  st.audio(audio_bytes, format='audio/wav')