# Importamos la librería from youtube_transcript_api import YouTubeTranscriptApi import re from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import AwaEmbeddings import os import openai import streamlit as st def get_transcript(url): video_id = re.search(r"(?<=v=)([^&#]+)", url) video_id = video_id.group(0) # retrieve the available transcripts transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) # iterate over all available transcripts for transcript in transcript_list: subtitles = transcript.translate('en').fetch() # Imprimimos los transcript text = '' for sub in subtitles: text = text + ' ' + sub['text'] return text embeddings = AwaEmbeddings() text_splitter = RecursiveCharacterTextSplitter( # Set a really small chunk size, just to show. chunk_size = 1500, chunk_overlap = 100, length_function = len, is_separator_regex = False, ) def chat(question): if 'database' in st.session_state: docs = st.session_state.database.similarity_search(question) prompt = [ {"role": "system", "content": """You are my Youtube Asisstant. I will pass you texts from a Youtube Video Transcrip and I need you to use them to answer my question from the Youtube Video. Please do not invent any information, and I am asking about information in the Youtube Video."""}, {"role":"user", "content": f"Context:{docs}"}, {"role":"user", "content": f"Question:{question}"}, ] response = openai.ChatCompletion.create( model="gpt-3.5-turbo-0613", messages=prompt, temperature = 0 ) return response["choices"][0]["message"]["content"] else: return "Error, not generated database" #------------------------------------------------------------- APP STREAMLIT-------------------------------------------------------------------- st.title("Ask Question Youtube Videos") with st.sidebar: if "api" not in st.session_state: api_key= st.text_input(label="api", placeholder="API Key from OpenAI", label_visibility="hidden") if st.button(label="Save"): os.environ["OPENAI_API_KEY"] = api_key openai.api_key = api_key st.session_state['api'] = api_key else: url = st.text_input(label="url", placeholder="Youtube Video URL", label_visibility="hidden") if st.button(label="Save"): st.session_state['url'] = url info = get_transcript(url) texts = text_splitter.create_documents([info]) st.session_state['database'] = FAISS.from_documents(texts, embeddings) if "api" not in st.session_state: st.write("Please, introduce your OpenAI API key to ask questions to any YouTube Video") elif 'url' not in st.session_state: st.write("Please, introduce link URL from the YouTube Video") else: # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Accept user input if prompt := st.chat_input("What is up?"): # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) # Display user message in chat message container with st.chat_message("user"): st.markdown(prompt) # Get response from your custom chat function response = chat(prompt) # Display assistant response in chat message container with st.chat_message("assistant"): st.markdown(response) st.session_state.messages.append({"role": "assistant", "content": response})