animatedvideo / app.py
saima730's picture
Update app.py
e0e70f0 verified
raw
history blame
2.75 kB
import gradio as gr
from gtts import gTTS
from moviepy.editor import TextClip, AudioFileClip
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch
import tempfile
import os
# Initialize RAG model components
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
def generate_response(input_text):
try:
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
generated = model.generate(input_ids)
response = tokenizer.batch_decode(generated, skip_special_tokens=True)[0]
return response
except Exception as e:
print(f"Error in generate_response: {e}")
return "Error generating response"
def text_to_speech(text):
try:
tts = gTTS(text)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
tts.save(temp_audio_file.name)
return temp_audio_file.name
except Exception as e:
print(f"Error in text_to_speech: {e}")
return None
def text_to_video(text, audio_filename):
try:
text_clip = TextClip(text, fontsize=50, color='white', bg_color='black', size=(640, 480))
text_clip = text_clip.set_duration(10)
audio_clip = AudioFileClip(audio_filename)
video_clip = text_clip.set_audio(audio_clip)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video_file:
video_clip.write_videofile(temp_video_file.name, codec='libx264')
return temp_video_file.name
except Exception as e:
print(f"Error in text_to_video: {e}")
return None
def process_text(input_text):
try:
response = generate_response(input_text)
audio_file = text_to_speech(response)
if audio_file:
video_file = text_to_video(response, audio_file)
if video_file:
return response, audio_file, video_file
else:
return response, audio_file, "Error generating video"
else:
return response, "Error generating audio", None
except Exception as e:
print(f"Error in process_text: {e}")
return "Error processing text", None, None
iface = gr.Interface(
fn=process_text,
inputs=gr.Textbox(label="Enter your text:"),
outputs=[gr.Textbox(label="RAG Model Response"), gr.Audio(label="Audio"), gr.Video(label="Video")],
live=True
)
iface.launch()