nikajoon commited on
Commit
294d87d
1 Parent(s): 0e07c1c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import wave
3
+ import numpy as np
4
+ from io import BytesIO
5
+ from huggingface_hub import hf_hub_download
6
+ from piper import PiperVoice
7
+ from transformers import pipeline
8
+ import hazm
9
+ import typing
10
+
11
+ normalizer = hazm.Normalizer()
12
+ sent_tokenizer = hazm.SentenceTokenizer()
13
+ word_tokenizer = hazm.WordTokenizer()
14
+
15
+ tagger_path = hf_hub_download(repo_id="gyroing/HAZM_POS_TAGGER", filename="pos_tagger.model")
16
+ tagger = hazm.POSTagger(model=tagger_path)
17
+ model_path = hf_hub_download(repo_id="gyroing/Persian-Piper-Model-gyro", filename="fa_IR-gyro-medium.onnx")
18
+ config_path = hf_hub_download(repo_id="gyroing/Persian-Piper-Model-gyro", filename="fa_IR-gyro-medium.onnx.json")
19
+ voice = PiperVoice.load(model_path, config_path)
20
+
21
+ def preprocess_text(text: str) -> typing.List[typing.List[str]]:
22
+ """Split/normalize text into sentences/words with hazm"""
23
+ text = normalizer.normalize(text)
24
+ processed_sentences = []
25
+
26
+ for sentence in sent_tokenizer.tokenize(text):
27
+ words = word_tokenizer.tokenize(sentence)
28
+ processed_words = fix_words(words)
29
+ processed_sentences.append(" ".join(processed_words))
30
+ return " ".join(processed_sentences)
31
+ def fix_words(words: typing.List[str]) -> typing.List[str]:
32
+ fixed_words = []
33
+
34
+ for word, pos in tagger.tag(words):
35
+ if pos[-1] == "Z":
36
+ if word[-1] != "ِ":
37
+ if (word[-1] == "ه") and (word[-2] != "ا"):
38
+ word += "‌ی"
39
+ word += "ِ"
40
+
41
+
42
+ fixed_words.append(word)
43
+
44
+ return fixed_words
45
+
46
+ def synthesize_speech(text):
47
+
48
+
49
+ # Create an in-memory buffer for the WAV file
50
+ buffer = BytesIO()
51
+ with wave.open(buffer, 'wb') as wav_file:
52
+ wav_file.setframerate(voice.config.sample_rate)
53
+ wav_file.setsampwidth(2) # 16-bit
54
+ wav_file.setnchannels(1) # mono
55
+
56
+ # Synthesize speech
57
+ eztext = preprocess_text(text)
58
+ voice.synthesize(eztext, wav_file)
59
+
60
+ # Convert buffer to NumPy array for Gradio output
61
+ buffer.seek(0)
62
+ audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
63
+
64
+ return audio_data.tobytes()
65
+
66
+ # Using Gradio Blocks
67
+ with gr.Blocks(theme=gr.themes.Base()) as blocks:
68
+ input_text = gr.Textbox(label="Input")
69
+ output_audio = gr.Audio(label="Output", type="numpy")
70
+ submit_button = gr.Button("Synthesize")
71
+
72
+ submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio])
73
+ # Run the app
74
+ blocks.launch()