Musa commited on
Commit
ba13912
1 Parent(s): 21689ef

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastspeech2 import FastSpeech2
2
+ voice_conversion_model = FastSpeech2.from_pretrained("path/to/pretrained/voice_conversion_model")
3
+ def convert_voice(text):
4
+ converted_voice = voice_conversion_model(text)
5
+ return converted_voice
6
+
7
+ def transcribe(microphone, state, task="transcribe"):
8
+ file = microphone
9
+
10
+ pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
11
+
12
+ text = pipe(file)["text"]
13
+ converted_voice = convert_voice(text)
14
+
15
+ return state + "\n" + converted_voice, state + "\n" + converted_voice
16
+
17
+ mf_transcribe = gr.Interface(
18
+ fn=transcribe,
19
+ inputs=[
20
+ gr.Audio(source="microphone", type="filepath", optional=True),
21
+ gr.State(value="")
22
+ ],
23
+ outputs=[
24
+ gr.Textbox(lines=15),
25
+ gr.State(),
26
+ gr.Audio(type="auto") # Add this line to include the converted voice as an output
27
+ ],
28
+ layout="horizontal",
29
+ theme="huggingface",
30
+ title="Whisper Large V2: Transcribe Audio and Voice Conversion",
31
+ live=True,
32
+ description=(
33
+ "Transcribe long-form microphone or audio inputs and convert the voice with the click of a button! Demo uses the"
34
+ f" checkpoint ~[{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})~ and 🤗 Transformers to transcribe audio files"
35
+ " of arbitrary length and FastSpeech2 for voice conversion."
36
+ ),
37
+ allow_flagging="never",
38
+ )