import os import time import gradio as gr import openai from langdetect import detect from gtts import gTTS from pdfminer.high_level import extract_text #any vector server should work, trying pinecone first import pinecone #langchain part import spacy import tiktoken from langchain.llms import OpenAI from langchain.text_splitter import SpacyTextSplitter from langchain.document_loaders import TextLoader from langchain.document_loaders import DirectoryLoader from langchain.indexes import VectorstoreIndexCreator from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Pinecone openai.api_key = os.environ['OPENAI_API_KEY'] pinecone_key = os.environ['PINECONE_API_KEY'] pinecone_environment='us-west1-gcp-free' user_db = {os.environ['username1']: os.environ['password1']} messages = [{"role": "system", "content": 'You are a helpful assistant.'}] #load up spacy nlp = spacy.load("en_core_web_sm") def init_pinecone(): pinecone.init(api_key=pinecone_key, environment=pinecone_environment) return def process_file(index_name, dir): init_pinecone() #using openai embedding hence dim = 1536 pinecone.create_index(index_name, dimension=1536, metric="cosine") #time.sleep(5) embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY']) splter = SpacyTextSplitter(chunk_size=1000,chunk_overlap=200) for doc in dir: loader = TextLoader(doc.name , encoding='utf8') content = loader.load() split_text = splter.split_documents(content) for text in split_text: Pinecone.from_documents([text], embeddings, index_name=index_name) #pipeline='zh_core_web_sm' return def list_pinecone(): init_pinecone() return pinecone.list_indexes() def show_pinecone(index_name): init_pinecone() #return pinecone.describe_index(index_name) index = pinecone.Index(index_name) stats = index.describe_index_stats() return stats def delete_pinecone(index_name): init_pinecone() pinecone.delete_index(index_name) return def roleChoice(role): global messages messages = [{"role": "system", "content": role}] return "role:" + role def talk2file(index_name, text): global messages #same as filesearch init_pinecone() embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY']) docsearch = Pinecone.from_existing_index(index_name, embeddings) docs = docsearch.similarity_search(prompt) prompt = text + ", based on the following text: \n\n" + docs[0].page_content messages.append({"role": "user", "content": prompt}) response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages) system_message = response["choices"][0]["message"] messages.append(system_message) chats = "" for msg in messages: if msg['role'] != 'system': chats += msg['role'] + ": " + msg['content'] + "\n\n" return chats def fileSearch(index_name, prompt): global messages init_pinecone() embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY']) docsearch = Pinecone.from_existing_index(index_name, embeddings) docs = docsearch.similarity_search(prompt) return "Content in file: \n\n" + docs[0].page_content + "\n\n" def clear(): global messages messages = [{"role": "system", "content": 'You are a helpful technology assistant.'}] return def show(): global messages chats = "" for msg in messages: if msg['role'] != 'system': chats += msg['role'] + ": " + msg['content'] + "\n\n" return chats with gr.Blocks() as chatHistory: gr.Markdown("Click the Clear button below to remove all the chat history.") clear_btn = gr.Button("Clear") clear_btn.click(fn=clear, inputs=None, outputs=None, queue=False) gr.Markdown("Click the Display button below to show all the chat history.") show_out = gr.Textbox() show_btn = gr.Button("Display") show_btn.click(fn=show, inputs=None, outputs=show_out, queue=False) #pinecone tools with gr.Blocks() as pinecone_tools: pinecone_list = gr.Textbox() list = gr.Button(value="List") list.click(fn=list_pinecone, inputs=None, outputs=pinecone_list, queue=False) pinecone_delete_name = gr.Textbox() delete = gr.Button(value="Delete") delete.click(fn=delete_pinecone, inputs=pinecone_delete_name, outputs=None, queue=False) pinecone_show_name = gr.Textbox() pinecone_info = gr.Textbox() show = gr.Button(value="Show") show.click(fn=show_pinecone, inputs=pinecone_show_name, outputs=pinecone_info, queue=False) role = gr.Interface(fn=roleChoice, inputs="text", outputs="text", description = "Choose your GPT roles, e.g. You are a helpful technology assistant. 你是一位 IT 架构师。 你是一位开发者关系顾问。你是一位机器学习工程师。你是一位高级 C++ 开发人员 ") text = gr.Interface(fn=textGPT, inputs="text", outputs="text") vector_server = gr.Interface(fn=process_file, inputs=["text", gr.inputs.File(file_count="directory")], outputs="text") #audio = gr.Interface(fn=audioGPT, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text") #siri = gr.Interface(fn=siriGPT, inputs=gr.Audio(source="microphone", type="filepath"), outputs = "audio") file = gr.Interface(fn=fileSearch, inputs=["text", "text"], outputs="text", description = "Enter file name and prompt") demo = gr.TabbedInterface([role, text, file, chatHistory, vector_server, pinecone_tools], [ "roleChoice", "chatGPT", "FileSearch", "ChatHistory", "VectorServer", "PineconeTools"]) if __name__ == "__main__": demo.launch(enable_queue=False, auth=lambda u, p: user_db.get(u) == p, auth_message="This is not designed to be used publicly as it links to a personal openAI API. However, you can copy my code and create your own multi-functional ChatGPT with your unique ID and password by utilizing the 'Repository secrets' feature in huggingface.") #demo.launch()