killerz3 commited on
Commit
1c3d65b
1 Parent(s): 6ca9466

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import asyncio
4
+ from moviepy.editor import AudioFileClip, concatenate_audioclips
5
+ from huggingface_hub import InferenceClient
6
+ import torch
7
+ import edge_tts
8
+ import tempfile
9
+ import gradio as gr
10
+
11
+ # Initialize Hugging Face Inference Client
12
+ Client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
13
+ generator = torch.Generator().manual_seed(42)
14
+
15
+ async def text_to_speech(text, voice, filename):
16
+ communicate = edge_tts.Communicate(text, voice)
17
+ await communicate.save(filename)
18
+
19
+ async def generate_conversation(script):
20
+ title = script['title']
21
+ content = script['content']
22
+
23
+ temp_files = []
24
+
25
+ tasks = []
26
+ for key, text in content.items():
27
+ speaker = key.split('_')[0] # Extract the speaker name
28
+ index = key.split('_')[1] # Extract the dialogue index
29
+ voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"
30
+
31
+ # Create temporary file for each speaker's dialogue
32
+ temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
33
+ temp_files.append(temp_file.name)
34
+
35
+ filename = temp_file.name
36
+ tasks.append(text_to_speech(text, voice, filename))
37
+ print(f"Generated audio for {speaker}_{index}: {filename}")
38
+
39
+ await asyncio.gather(*tasks)
40
+
41
+ # Combine the audio files using moviepy
42
+ audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
43
+ combined = concatenate_audioclips(audio_clips)
44
+
45
+ # Create temporary file for the combined output
46
+ temp_output_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
47
+ output_filename = temp_output_file.name
48
+
49
+ # Save the combined file
50
+ combined.write_audiofile(output_filename)
51
+ print(f"Combined audio saved as: {output_filename}")
52
+
53
+ # Clean up temporary files
54
+ for temp_file in temp_files:
55
+ os.remove(temp_file)
56
+ print(f"Deleted temporary file: {temp_file}")
57
+
58
+ return output_filename
59
+
60
+ # Function to generate podcast based on user input
61
+ def generate_podcast(topic, seed):
62
+ system_instructions = '''[SYSTEM] You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the topic given by the user.
63
+ Please provide the script in the following JSON format:
64
+ {
65
+ "title": "[string]",
66
+ "content": {
67
+ "Alice_0": "[string]",
68
+ "BOB_0": "[string]",
69
+ ...
70
+ }
71
+ }
72
+ Be concise.
73
+ '''
74
+
75
+ text = f" Topic: {topic}"
76
+ formatted_prompt = system_instructions + text
77
+ stream = Client.text_generation(formatted_prompt, max_new_tokens=1024, seed=seed, stream=True, details=True, return_full_text=False)
78
+
79
+ generated_script = ""
80
+ for response in stream:
81
+ if not response.token.text == "</s>":
82
+ generated_script += response.token.text
83
+
84
+ # Generate the podcast
85
+ script_json = json.loads(generated_script) # Use the generated script as input
86
+ output_filename = asyncio.run(generate_conversation(script_json))
87
+ print("Output File:"+output_filename)
88
+
89
+ # Read the generated audio file
90
+ with open(output_filename, "rb") as f:
91
+ audio_bytes = f.read()
92
+
93
+ # Clean up the final output temporary file
94
+ os.remove(output_filename)
95
+ print(f"Deleted temporary file: {output_filename}")
96
+
97
+ return audio_bytes
98
+
99
+ DESCRIPTION = """ # <center><b>PODGEN 📻</b></center>
100
+ ### <center>Generate a podcast on any topic</center>
101
+ ### <center>Use the Power of llms to understand any topic better</center>
102
+ """
103
+
104
+ with gr.Blocks(css="style.css") as demo:
105
+ gr.Markdown(DESCRIPTION)
106
+ with gr.Row():
107
+
108
+ seed = gr.Slider(
109
+ label="Seed",
110
+ minimum=0,
111
+ maximum=999999,
112
+ step=1,
113
+ value=0,
114
+ visible=False
115
+ )
116
+ input = gr.Textbox(label="Topic", placeholder="Enter a topic")
117
+ output = gr.Audio(label="Podgen", type="filepath",
118
+ interactive=False,
119
+ autoplay=True,
120
+ elem_classes="audio")
121
+ gr.Interface(
122
+ batch=True,
123
+ max_batch_size=10,
124
+ fn=generate_podcast,
125
+ inputs=[input, seed],
126
+ outputs=[output], live=True)
127
+
128
+
129
+
130
+ if __name__ == "__main__":
131
+ demo.queue(max_size=200).launch()