Spaces:

nikajoon
/

PTTS

Sleeping

App Files Files Community

nikajoon commited on Aug 17

Commit

294d87d

•

1 Parent(s): 0e07c1c

Upload app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gradio as gr
+import wave
+import numpy as np
+from io import BytesIO
+from huggingface_hub import hf_hub_download
+from piper import PiperVoice
+from transformers import pipeline
+import hazm
+import typing
+normalizer = hazm.Normalizer()
+sent_tokenizer = hazm.SentenceTokenizer()
+word_tokenizer = hazm.WordTokenizer()
+tagger_path = hf_hub_download(repo_id="gyroing/HAZM_POS_TAGGER",  filename="pos_tagger.model")
+tagger = hazm.POSTagger(model=tagger_path)
+model_path = hf_hub_download(repo_id="gyroing/Persian-Piper-Model-gyro", filename="fa_IR-gyro-medium.onnx")
+config_path = hf_hub_download(repo_id="gyroing/Persian-Piper-Model-gyro", filename="fa_IR-gyro-medium.onnx.json")
+voice = PiperVoice.load(model_path, config_path)
+def preprocess_text(text: str) -> typing.List[typing.List[str]]:
+        """Split/normalize text into sentences/words with hazm"""
+        text = normalizer.normalize(text)
+        processed_sentences = []
+        for sentence in sent_tokenizer.tokenize(text):
+            words = word_tokenizer.tokenize(sentence)
+            processed_words = fix_words(words)
+            processed_sentences.append(" ".join(processed_words))
+        return  " ".join(processed_sentences)
+def fix_words(words: typing.List[str]) -> typing.List[str]:
+        fixed_words = []
+        for word, pos in tagger.tag(words):
+            if pos[-1] == "Z":
+                if word[-1] != "ِ":
+                    if (word[-1] == "ه") and (word[-2] != "ا"):
+                        word += "‌ی"
+                word += "ِ"
+            fixed_words.append(word)
+        return fixed_words
+def synthesize_speech(text):
+    # Create an in-memory buffer for the WAV file
+    buffer = BytesIO()
+    with wave.open(buffer, 'wb') as wav_file:
+        wav_file.setframerate(voice.config.sample_rate)
+        wav_file.setsampwidth(2)  # 16-bit
+        wav_file.setnchannels(1)  # mono
+        # Synthesize speech
+        eztext = preprocess_text(text)
+        voice.synthesize(eztext, wav_file)
+    # Convert buffer to NumPy array for Gradio output
+    buffer.seek(0)
+    audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
+    return audio_data.tobytes()
+# Using Gradio Blocks
+with gr.Blocks(theme=gr.themes.Base()) as blocks:
+    input_text = gr.Textbox(label="Input")
+    output_audio = gr.Audio(label="Output", type="numpy")
+    submit_button = gr.Button("Synthesize")
+    submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio])
+# Run the app
+blocks.launch()