EmadSalem Yusin commited on
Commit
c45d4e0
β€’
0 Parent(s):

Duplicate from Yusin/Speech2ChatGPT2Speech

Browse files

Co-authored-by: Chen, Yusin <[email protected]>

Files changed (6) hide show
  1. .gitattributes +27 -0
  2. .gitignore +1 -0
  3. README.md +41 -0
  4. app.py +106 -0
  5. packages.txt +2 -0
  6. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Speech2ChatGPT2Speech
3
+ emoji: πŸ—£οΈπŸ™‰
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: gradio
7
+ python_version: 3.9
8
+ sdk_version: 3.2
9
+ app_file: app.py
10
+ models:
11
+ - neongeckocom/tts-vits-ljspeech-en
12
+ - neongeckocom/tts-vits-css10-es
13
+ - neongeckocom/tts-vits-css10-fr
14
+ - neongeckocom/tts-vits-css10-de
15
+ - neongeckocom/tts-vits-cv-it
16
+ - neongeckocom/tts-vits-mai-pl
17
+ - neongeckocom/tts-vits-mai-uk
18
+ - neongeckocom/tts-vits-cv-ro
19
+ - neongeckocom/tts-vits-css10-hu
20
+ - neongeckocom/tts-vits-cv-el
21
+ - neongeckocom/tts-vits-cv-cs
22
+ - neongeckocom/tts-vits-cv-sv
23
+ - neongeckocom/tts-vits-cv-pt
24
+ - neongeckocom/tts-vits-cv-bg
25
+ - neongeckocom/tts-vits-cv-hr
26
+ - neongeckocom/tts-vits-cv-da
27
+ - neongeckocom/tts-vits-cv-sk
28
+ - neongeckocom/tts-vits-css10-nl
29
+ - neongeckocom/tts-vits-css10-fi
30
+ - neongeckocom/tts-vits-cv-lt
31
+ - neongeckocom/tts-vits-cv-sl
32
+ - neongeckocom/tts-vits-cv-lv
33
+ - neongeckocom/tts-vits-cv-et
34
+ - neongeckocom/tts-vits-cv-ga
35
+ - neongeckocom/tts-vits-cv-mt
36
+ pinned: false
37
+ license: bsd-3-clause
38
+ duplicated_from: Yusin/Speech2ChatGPT2Speech
39
+ ---
40
+
41
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import gradio as gr
3
+ from neon_tts_plugin_coqui import CoquiTTS
4
+ LANGUAGES = list(CoquiTTS.langs.keys())
5
+ default_lang = "en"
6
+
7
+ # ChatGPT
8
+ from pyChatGPT import ChatGPT
9
+ import whisper
10
+ whisper_model = whisper.load_model("small")
11
+ import os
12
+ session_token = os.environ.get('SessionToken')
13
+
14
+ title = "Speech to ChatGPT to Speech"
15
+ #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
16
+ #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
17
+
18
+ coquiTTS = CoquiTTS()
19
+
20
+
21
+ # ChatGPT
22
+ def chat_hf(audio, custom_token, language):
23
+ try:
24
+ whisper_text = translate(audio)
25
+ api = ChatGPT(session_token)
26
+ resp = api.send_message(whisper_text)
27
+
28
+ api.refresh_auth() # refresh the authorization token
29
+ api.reset_conversation() # reset the conversation
30
+ gpt_response = resp['message']
31
+
32
+ except:
33
+ whisper_text = translate(audio)
34
+ api = ChatGPT(custom_token)
35
+ resp = api.send_message(whisper_text)
36
+
37
+ api.refresh_auth() # refresh the authorization token
38
+ api.reset_conversation() # reset the conversation
39
+ gpt_response = resp['message']
40
+
41
+ # to voice
42
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
43
+ coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
44
+
45
+ return whisper_text, gpt_response, fp.name
46
+
47
+ # whisper
48
+ def translate(audio):
49
+ print("""
50
+ β€”
51
+ Sending audio to Whisper ...
52
+ β€”
53
+ """)
54
+
55
+ audio = whisper.load_audio(audio)
56
+ audio = whisper.pad_or_trim(audio)
57
+
58
+ mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
59
+
60
+ _, probs = whisper_model.detect_language(mel)
61
+
62
+ transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
63
+
64
+ transcription = whisper.decode(whisper_model, mel, transcript_options)
65
+
66
+ print("language spoken: " + transcription.language)
67
+ print("transcript: " + transcription.text)
68
+ print("β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”")
69
+
70
+ return transcription.text
71
+
72
+ with gr.Blocks() as blocks:
73
+ gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
74
+ + title
75
+ + "</h1>")
76
+ #gr.Markdown(description)
77
+ with gr.Row(equal_height=True):# equal_height=False
78
+ with gr.Column():# variant="panel"
79
+ radio = gr.Radio(
80
+ label="Language",
81
+ choices=LANGUAGES,
82
+ value=default_lang
83
+ )
84
+ audio_file = gr.inputs.Audio(source="microphone", type="filepath")
85
+ custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
86
+ with gr.Row():# mobile_collapse=False
87
+ submit = gr.Button("Submit", variant="primary")
88
+ with gr.Column():
89
+ text1 = gr.Textbox(label="Speech to Text")
90
+ text2 = gr.Textbox(label="chatGPT response")
91
+ audio = gr.Audio(label="Output", interactive=False)
92
+ #gr.Markdown(info)
93
+ #gr.Markdown("<center>"
94
+ # +f'<img src={badge} alt="visitors badge"/>'
95
+ # +"</center>")
96
+
97
+ # actions
98
+ submit.click(
99
+ chat_hf,
100
+ [audio_file, custom_token, radio],
101
+ [text1, text2, audio],
102
+ )
103
+ radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
104
+
105
+
106
+ blocks.launch(debug=True)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ espeak-ng
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ neon-tts-plugin-coqui==0.6.0
2
+ pyChatGPT
3
+ git+https://github.com/openai/whisper.git