Spaces:

mr-dee
/

prompt-to-video

Runtime error

App Files Files Community

Dy commited on Jul 16, 2023

Commit

2ac6b63

•

1 Parent(s): f18bf59

Create app.py

Browse files

Files changed (1) hide show

app.py +314 -0

app.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import os
+from bs4 import BeautifulSoup
+import gradio as gr
+from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate
+from langchain.memory import ConversationBufferWindowMemory
+import openai
+import requests
+from langchain.chat_models import ChatOpenAI
+import ast
+import imgkit
+import pdfkit
+import imgkit
+import re
+import glob
+import openai
+OPENAI_API_KEY="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"
+dict_list_format = '''[{'header': 'slide1_title',
+  'paragraphs': ['bullet_point1',
+   'bullet_point2',
+   'bullet_point3',]},
+'header': 'slide2_title',
+  'paragraphs': ['bullet_point1',
+   'bullet_point2',
+   'bullet_point3',
+...]},
+'header': 'slide3_title',
+  'paragraphs': ['bullet_point1',
+   'bullet_point2',
+   'bullet_point3',
+...]},
+'header': 'slide4_title',
+  'paragraphs': ['bullet_point1',
+   'bullet_point2',
+   'bullet_point3',
+...]},
+'header': 'slide5_title',
+  'paragraphs': ['bullet_point1',
+   'bullet_point2',
+   'bullet_point3',
+...]}]
+'''
+import google.cloud.texttospeech as tts
+from google.oauth2 import service_account
+credentials = service_account.Credentials.from_service_account_file("tts_google.json")
+def text_to_wav(voice_name: str, text: str, file_name: str):
+    language_code = "-".join(voice_name.split("-")[:2])
+    text_input = tts.SynthesisInput(text=text)
+    voice_params = tts.VoiceSelectionParams(
+        language_code=language_code, name=voice_name
+    )
+    audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)
+    client = tts.TextToSpeechClient(credentials=credentials)
+    response = client.synthesize_speech(
+        input=text_input,
+        voice=voice_params,
+        audio_config=audio_config,
+    )
+    filename = f"{file_name}"
+    with open(filename, "wb") as out:
+        out.write(response.audio_content)
+        print(f'Generated speech saved to "{filename}"')
+def prompt_to_video(video_prompt):
+    template = '''
+    {history}
+    {human_input}
+    '''
+    prompt = PromptTemplate(
+        input_variables=["history", "human_input"],
+        template=template
+    )
+    chatgpt_chain = LLMChain(
+        llm=ChatOpenAI(model="gpt-4", temperature=0.5,openai_api_key=OPENAI_API_KEY),
+        prompt=prompt,
+        verbose=True,
+        memory=ConversationBufferWindowMemory(k=10),
+    )
+    prompt_input1 = f'''
+    You are a world expert oracle that knows everything.
+    You are also an excellent teacher that explains everything succintly and simply like towards a kid.
+    You are also an expert slide maker and think everything step by step.
+    You are tasked to create 5 slides today.
+    Here is the topic:
+    {video_prompt}
+    Here is the output python list format:
+    {dict_list_format}
+    The slides should be created in a python list format.
+    The list consists of python dictionary objects in the list.
+    Each dictionary object contains the header and paragraphs as keys.
+    Do not name the slide as "Slide 1" or any number.  Insert header as header string.
+    The header is the title of the slide and the paragraph should be a list of string object.
+    Return the output in a python list format.
+    Make sure there is only 5 objects in the python list.
+    Do not declare a new variable, output the python list object only.
+    Do not say "Here's your".  Directly output the python list object only.
+    Make sure there is nothing before or after the python list object.  ONLY output the python list object.
+    '''
+    slide_str_list = []
+    while len(slide_str_list) != 5:
+        slide_dict=chatgpt_chain.predict(human_input=prompt_input1)
+        try:
+            slide_str_list = ast.literal_eval(slide_dict)
+        except:
+            print("Already formatted.")
+    print("this is the slides:", slide_str_list)
+    print("length is:", len(slide_str_list))
+    html_out_list = []
+    for i in slide_str_list:
+        template = '''
+        {history}
+        {human_input}
+        '''
+        prompt = PromptTemplate(
+            input_variables=["history", "human_input"],
+            template=template
+        )
+        chatgpt_chain = LLMChain(
+            llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0.5,openai_api_key=OPENAI_API_KEY),
+            prompt=prompt,
+            verbose=True,
+            memory=ConversationBufferWindowMemory(k=10),
+        )
+        prompt_input2 = f'''
+        You are a world expert oracle that knows everything.
+        You are also an excellent teacher that explains everything succintly and simply like towards a kid.
+        You are also an expert slide maker and thinks about everything step by step.
+        You are tasked to convert a python dictionary into a formatted HTML code.
+        The dictionary object consist of the header and paragraph key.
+        The paragraph key is a list of strings.
+        Here is the dictionary object:
+        {i}
+        The slide should be created in a HTML format with the correct format of 16:9 aspect ratio.
+        The wording of the slides should be formatted appropriately with the header and paragraph.
+        The paragraph in the slides should be formatted in bullet points and each bullet point should be 1.5 line spacing apart.
+        Header and paragraph should be aligned in an aesthetically pleasing way.
+        Return the output as a nicely formatted HTML string.
+        Font should be Roboto.
+        Do not say "Here's your" or "Sure".  Directly output the HTML string only.
+        Make sure there is nothing before or after the HTML string.  ONLY output the HTML string.
+        Do not explain what is the HTML code about.
+        Do not declare a new variable, output the HTML string only.
+        '''
+        html_out_list.append(chatgpt_chain.predict(human_input=prompt_input2))
+    os.makedirs("slide", exist_ok=True)
+    num = 1
+    for html_string in html_out_list:
+        print(html_string)
+        with open(f"slide/slide_{num}.html", "w") as file:
+            file.write(html_string)
+        num = num + 1
+    extract_path = 'slide'
+    # Create the directory to extract to if it doesn't exist
+    os.makedirs(extract_path, exist_ok=True)
+    # Configuration for imgkit
+    config = imgkit.config(wkhtmltoimage='C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltoimage.exe')
+    # The path to store the images
+    image_path = os.path.join(extract_path, 'images')
+    os.makedirs(image_path, exist_ok=True)
+    # Get the list of HTML files
+    html_files = sorted([f for f in os.listdir(extract_path) if f.endswith('.html')])
+    # Dictionary to store the file names and their corresponding images
+    file_images = {}
+    # Loop through the HTML files and convert them to images
+    for html_file in html_files:
+        # Full path of the HTML file
+        full_path = os.path.join(extract_path, html_file)
+        # Image file name
+        image_file = re.sub('.html$', '.jpg', html_file)
+        # Full path of the image file
+        full_image_path = os.path.join(image_path, image_file)
+        # Convert the HTML to an image
+        imgkit.from_file(full_path, full_image_path, config=config)
+        # Store the image file name
+        file_images[html_file] = image_file
+    print(file_images)
+    template = '''
+    {history}
+    {human_input}
+    '''
+    prompt = PromptTemplate(
+        input_variables=["history", "human_input"],
+        template=template
+    )
+    chatgpt_chain = LLMChain(
+        llm=ChatOpenAI(model="gpt-4", temperature=0.5,openai_api_key=OPENAI_API_KEY),
+        prompt=prompt,
+        verbose=True,
+        memory=ConversationBufferWindowMemory(k=10),
+    )
+    prompt_input3 = f'''
+    You are a world expert oracle that knows everything.
+    You are also an excellent teacher that explains everything succintly and simply like towards a kid.
+    You are an expert orator and presenter.
+    You are tasked to create a voiceover for 5 slides.
+    The slides are formatted in a python list of dictionary objects.
+    Each dictionary object is a slide.
+    {slide_str_list}
+    Input: Python list of dictionary objects
+    Output: Python list of string objects
+    The output list consists of string objects.
+    The voiceover text purpose is a speech presentation of the slide.
+    The voiceover text should be about the content of each slide but at the same time add additional information to make the presentation funny and engaging.
+    Each string is a voiceover text of each slide of the python dictionary.
+    Each voiceover string object should be around 80 words.
+    Make sure there is only 5 objects in the python list.
+    Do not declare a new variable, output the python list object only.
+    Make sure there is nothing before or after the python list object.  ONLY output the python list object.
+    Return the output in a python list format.
+    Do not say "Here's your" or "Sure".  Directly output python list of dictionary object only.
+    Do not declare a new variable, output the python list of dictionary object only.
+    '''
+    voiceover_list = []
+    voiceover_list=chatgpt_chain.predict(human_input=prompt_input3)
+    try:
+        voiceover_list = ast.literal_eval(voiceover_list)
+    except:
+        print("Already formatted.")
+    num = 1
+    for i in voiceover_list:
+        file_name = "slide/slide" + f"_{num}" + ".wav"
+        text_to_wav("en-US-Neural2-F",i, file_name)
+        print(file_name)
+        num = num + 1
+    # Get list of .jpg and .wav files from the correct directories
+    jpg_files = sorted(glob.glob("slide/images/*.jpg"))
+    wav_files = sorted(glob.glob("slide/*.wav"))
+    jpg_files, wav_files
+    from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips
+    # Create a list to store the clips
+    clips = []
+    # Loop through each jpg and wav file
+    for jpg_file, wav_file in zip(jpg_files, wav_files):
+        # Load the audio file and get its duration
+        audio = AudioFileClip(wav_file)
+        duration = audio.duration
+        print(duration)
+        # Calculate the frame rate as the inverse of the duration
+        fps = 1 / duration if duration != 0 else 1
+        # Create a video clip from the image and set its duration and fps to match the audio
+        clip = ImageSequenceClip([jpg_file], durations=[duration], fps=fps)
+        # Set the audio of the clip to the wav file
+        clip = clip.set_audio(audio)
+        # Add the clip to the list of clips
+        clips.append(clip)
+    # Concatenate all clips into a single video
+    video = concatenate_videoclips(clips)
+    video_path = "slide/output.mp4"
+    # Write the video to a file
+    video.write_videofile(video_path)
+    return video_path
+iface = gr.Interface(fn=prompt_to_video, inputs="text", outputs=["file"])
+iface.launch()