Spaces:

Nag189
/

Text-to-Voice

Runtime error

App Files Files Community

Nag189 commited on Nov 17, 2023

Commit

00b0d1a

•

1 Parent(s): 4300cef

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -110

app.py CHANGED Viewed

@@ -1,131 +1,68 @@
 import streamlit as st
 import time
 from datetime import datetime
-from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan,SpeechT5ForTextToSpeech
 import numpy as np
 import torch
 from io import StringIO
 import soundfile as sf
-html_temp= """
-    <div style="background-color:tomato;padding:10px">
-    <h2 style="color:white;text-align:centre;"> Text-to-Speech </h2>
-    </div>
-    """
-st.markdown(html_temp,unsafe_allow_html=True)
-st.markdown(
-    """
-     Text to Audio Conversion.
-"""
-)
-model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
-processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
-vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-speaker_embeddings = np.load("cmu_us_slt_arctic-wav-arctic_a0508.npy")
-speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
-text = st.text_area("Type your text..")
-st.button("Convert")
-inputs = processor(text=text, return_tensors="pt")
-spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
-with torch.no_grad():
-    speech = vocoder(spectrogram)
-    sf.write("speech.wav", speech.numpy(), samplerate=16000)
-audio_file = open('speech.wav', 'rb')
-audio_bytes = audio_file.read()
-st.audio(audio_bytes, format='audio/wav')
-uploaded_file=st.file_uploader("Upload your text file here",type=['txt'] )
-if uploaded_file is not None:
-    stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
-    #To read file as string:
-    text = stringio.read()
-    st.write(text)
-    st.button("Convert",key=1)
     inputs = processor(text=text, return_tensors="pt")
     spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
     with torch.no_grad():
         speech = vocoder(spectrogram)
         sf.write("speech.wav", speech.numpy(), samplerate=16000)
-    audio_file = open('speech.wav', 'rb')
     audio_bytes = audio_file.read()
     st.audio(audio_bytes, format='audio/wav')
-## footer
-from htbuilder import HtmlElement, div, ul, li, br, hr, a, p, img, styles, classes, fonts
-from htbuilder.units import percent, px
-from htbuilder.funcs import rgba, rgb
-def image(src_as_string, **style):
-    return img(src=src_as_string, style=styles(**style))
-def link(link, text, **style):
-    return a(_href=link, _target="_blank", style=styles(**style))(text)
-def layout(*args):
-    style = """
-    <style>
-      # MainMenu {visibility: hidden;}
-      footer {visibility: hidden;}
-     .stApp { bottom: 105px; }
-    </style>
-    """
-    style_div = styles(
-        position="fixed",
-        left=0,
-        bottom=0,
-        margin=px(0, 0, 0, 0),
-        width=percent(100),
-        color="black",
-        text_align="center",
-        height="auto",
-        opacity=1
-    )
-    style_hr = styles(
-        display="block",
-        margin=px(8, 8, "auto", "auto"),
-        border_style="solid",
-        border_width=px(0.5)
-    )
-    body = p()
-    foot = div(
-        style=style_div
-    )(
-        hr(
-            style=style_hr
-        ),
-        body
-    )
-    st.markdown(style,unsafe_allow_html=True)
-    for arg in args:
-        if isinstance(arg, str):
-            body(arg)
-        elif isinstance(arg, HtmlElement):
-            body(arg)
-    st.markdown(str(foot), unsafe_allow_html=True)
-def footer():
-    myargs = [
-        "©️ Apps Consultants",
-    ]
-    layout(*myargs)
-if __name__ == "__main__":
-    footer()

 import streamlit as st
 import time
 from datetime import datetime
+from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan, SpeechT5ForTextToSpeech
 import numpy as np
 import torch
 from io import StringIO
 import soundfile as sf
+# Improved Styling
+def local_css(file_name):
+    with open(file_name) as f:
+        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
+local_css("style.css")  # Assuming a CSS file named 'style.css' in the same directory
+# Streamlined Layout
+st.title("Text-to-Voice Conversion")
+st.markdown("Convert your text to speech using advanced AI models.")
+# Load models outside of function calls for efficiency
+@st.cache(allow_output_mutation=True)
+def load_models():
+    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+    return model, processor, vocoder
+model, processor, vocoder = load_models()
+# Load speaker embeddings
+@st.cache(allow_output_mutation=True)
+def get_speaker_embeddings():
+    speaker_embeddings = np.load("cmu_us_slt_arctic-wav-arctic_a0508.npy")
+    return torch.tensor(speaker_embeddings).unsqueeze(0)
+speaker_embeddings = get_speaker_embeddings()
+# Text Input
+text = st.text_area("Type your text or upload a text file below.")
+# Function to convert text to speech
+def text_to_speech(text):
     inputs = processor(text=text, return_tensors="pt")
     spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
     with torch.no_grad():
         speech = vocoder(spectrogram)
         sf.write("speech.wav", speech.numpy(), samplerate=16000)
+        return "speech.wav"
+# Convert Button
+if st.button("Convert"):
+    if text:
+        audio_path = text_to_speech(text)
+        audio_file = open(audio_path, 'rb')
+        audio_bytes = audio_file.read()
+        st.audio(audio_bytes, format='audio/wav')
+    else:
+        st.error("Please enter some text to convert.")
+# File Uploader
+uploaded_file = st.file_uploader("Upload your text file here", type=['txt'])
+if uploaded_file is not None:
+    text = uploaded_file.getvalue().decode("utf-8")
+    audio_path = text_to_speech(text)
+    audio_file = open(audio_path, 'rb')
     audio_bytes = audio_file.read()
     st.audio(audio_bytes, format='audio/wav')