jozzy commited on
Commit
5cbeaec
0 Parent(s):

Duplicate from jozzy/langchain

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +208 -0
  4. requirements.txt +12 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Langchain
3
+ emoji: 🚀
4
+ colorFrom: gray
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.29.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: jozzy/langchain
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import gradio as gr
4
+ import openai
5
+
6
+ from langdetect import detect
7
+ from gtts import gTTS
8
+ from pdfminer.high_level import extract_text
9
+
10
+ #any vector server should work, trying pinecone first
11
+ import pinecone
12
+
13
+ #langchain part
14
+ import spacy
15
+ import tiktoken
16
+ from langchain.llms import OpenAI
17
+ from langchain.text_splitter import SpacyTextSplitter
18
+ from langchain.document_loaders import TextLoader
19
+ from langchain.document_loaders import DirectoryLoader
20
+ from langchain.indexes import VectorstoreIndexCreator
21
+ from langchain.embeddings.openai import OpenAIEmbeddings
22
+ from langchain.vectorstores import Pinecone
23
+
24
+
25
+ openai.api_key = os.environ['OPENAI_API_KEY']
26
+ pinecone_key = os.environ['PINECONE_API_KEY']
27
+ pinecone_environment='us-west1-gcp-free'
28
+
29
+
30
+ user_db = {os.environ['username1']: os.environ['password1']}
31
+
32
+ messages = [{"role": "system", "content": 'You are a helpful assistant.'}]
33
+
34
+ #load up spacy
35
+
36
+ nlp = spacy.load("en_core_web_sm")
37
+
38
+
39
+
40
+ def init_pinecone():
41
+ pinecone.init(api_key=pinecone_key, environment=pinecone_environment)
42
+ return
43
+
44
+
45
+
46
+
47
+ def process_file(index_name, dir):
48
+
49
+ init_pinecone()
50
+
51
+ #using openai embedding hence dim = 1536
52
+ pinecone.create_index(index_name, dimension=1536, metric="cosine")
53
+ #time.sleep(5)
54
+
55
+ embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
56
+ splter = SpacyTextSplitter(chunk_size=1000,chunk_overlap=200)
57
+
58
+ for doc in dir:
59
+ loader = TextLoader(doc.name , encoding='utf8')
60
+ content = loader.load()
61
+ split_text = splter.split_documents(content)
62
+ for text in split_text:
63
+ Pinecone.from_documents([text], embeddings, index_name=index_name)
64
+
65
+ #pipeline='zh_core_web_sm'
66
+
67
+
68
+ return
69
+
70
+
71
+ def list_pinecone():
72
+ init_pinecone()
73
+ return pinecone.list_indexes()
74
+
75
+
76
+ def show_pinecone(index_name):
77
+ init_pinecone()
78
+ #return pinecone.describe_index(index_name)
79
+ index = pinecone.Index(index_name)
80
+ stats = index.describe_index_stats()
81
+ return stats
82
+
83
+
84
+
85
+ def delete_pinecone(index_name):
86
+ init_pinecone()
87
+ pinecone.delete_index(index_name)
88
+ return
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ def roleChoice(role):
98
+ global messages
99
+ messages = [{"role": "system", "content": role}]
100
+ return "role:" + role
101
+
102
+
103
+
104
+
105
+
106
+
107
+ def talk2file(index_name, text):
108
+ global messages
109
+
110
+ #same as filesearch
111
+ init_pinecone()
112
+ embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
113
+ docsearch = Pinecone.from_existing_index(index_name, embeddings)
114
+ docs = docsearch.similarity_search(text)
115
+
116
+
117
+ prompt = text + ", 根据以下文本: \n\n" + docs[0].page_content
118
+ messages.append({"role": "user", "content": prompt})
119
+
120
+ response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
121
+
122
+ system_message = response["choices"][0]["message"]
123
+ messages.append(system_message)
124
+
125
+ chats = ""
126
+ for msg in messages:
127
+ if msg['role'] != 'system':
128
+ chats += msg['role'] + ": " + msg['content'] + "\n\n"
129
+
130
+ return chats
131
+
132
+
133
+
134
+
135
+
136
+ def fileSearch(index_name, prompt):
137
+ global messages
138
+
139
+ init_pinecone()
140
+ embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
141
+ docsearch = Pinecone.from_existing_index(index_name, embeddings)
142
+ docs = docsearch.similarity_search(prompt)
143
+
144
+ return "Content in file: \n\n" + docs[0].page_content + "\n\n"
145
+
146
+
147
+
148
+ def clear():
149
+ global messages
150
+ messages = [{"role": "system", "content": 'You are a helpful technology assistant.'}]
151
+ return
152
+
153
+ def show():
154
+ global messages
155
+ chats = ""
156
+ for msg in messages:
157
+ if msg['role'] != 'system':
158
+ chats += msg['role'] + ": " + msg['content'] + "\n\n"
159
+
160
+ return chats
161
+
162
+
163
+ with gr.Blocks() as chatHistory:
164
+ gr.Markdown("Click the Clear button below to remove all the chat history.")
165
+ clear_btn = gr.Button("Clear")
166
+ clear_btn.click(fn=clear, inputs=None, outputs=None, queue=False)
167
+
168
+ gr.Markdown("Click the Display button below to show all the chat history.")
169
+ show_out = gr.Textbox()
170
+ show_btn = gr.Button("Display")
171
+ show_btn.click(fn=show, inputs=None, outputs=show_out, queue=False)
172
+
173
+
174
+ #pinecone tools
175
+ with gr.Blocks() as pinecone_tools:
176
+ pinecone_list = gr.Textbox()
177
+ list = gr.Button(value="List")
178
+ list.click(fn=list_pinecone, inputs=None, outputs=pinecone_list, queue=False)
179
+
180
+ pinecone_delete_name = gr.Textbox()
181
+ delete = gr.Button(value="Delete")
182
+ delete.click(fn=delete_pinecone, inputs=pinecone_delete_name, outputs=None, queue=False)
183
+
184
+ pinecone_show_name = gr.Textbox()
185
+ pinecone_info = gr.Textbox()
186
+ show = gr.Button(value="Show")
187
+ show.click(fn=show_pinecone, inputs=pinecone_show_name, outputs=pinecone_info, queue=False)
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+ role = gr.Interface(fn=roleChoice, inputs="text", outputs="text", description = "Choose your GPT roles, e.g. You are a helpful technology assistant. 你是一位 IT 架构师。 你是一���开发者关系顾问。你是一位机器学习工程师。你是一位高级 C++ 开发人员 ")
196
+ text = gr.Interface(fn=talk2file, inputs=["text", "text"], outputs="text")
197
+
198
+ vector_server = gr.Interface(fn=process_file, inputs=["text", gr.inputs.File(file_count="directory")], outputs="text")
199
+
200
+ #audio = gr.Interface(fn=audioGPT, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
201
+ #siri = gr.Interface(fn=siriGPT, inputs=gr.Audio(source="microphone", type="filepath"), outputs = "audio")
202
+ file = gr.Interface(fn=fileSearch, inputs=["text", "text"], outputs="text", description = "Enter file name and prompt")
203
+ demo = gr.TabbedInterface([role, text, file, vector_server, pinecone_tools, chatHistory], [ "roleChoice", "Talk2File", "FileSearch", "VectorServer", "PineconeTools", "ChatHistory"])
204
+
205
+ if __name__ == "__main__":
206
+ demo.launch(enable_queue=False, auth=lambda u, p: user_db.get(u) == p,
207
+ auth_message="This is not designed to be used publicly as it links to a personal openAI API. However, you can copy my code and create your own multi-functional ChatGPT with your unique ID and password by utilizing the 'Repository secrets' feature in huggingface.")
208
+ #demo.launch()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://gradio-builds.s3.amazonaws.com/http-auth/attempt-1/gradio-3.18.1b2-py3-none-any.whl
2
+ openai
3
+ ffmpeg
4
+ langdetect
5
+ gtts
6
+ pdfminer
7
+ pdfminer.six
8
+ pinecone-client
9
+ langchain
10
+ spacy
11
+ https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
12
+ tiktoken