rzanoli commited on
Commit
36e28d7
1 Parent(s): 3f5c8b6

Add application file

Browse files
Files changed (2) hide show
  1. app.py +115 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.llms import CTransformers
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
6
+ from langchain.chains import RetrievalQA
7
+ import gradio as gr
8
+
9
+ # Mount Google Drive
10
+ # from google.colab import drive
11
+ # drive.mount('/content/drive/')
12
+ # !ls /content/drive/My\ Drive/stores/enron_cosine/
13
+
14
+ # The model used to generate responses based on retrieved content from the database in response to user inquiries.
15
+ local_llm = "TheBloke/zephyr-7B-beta-GGUF"
16
+
17
+ config = {
18
+ # Explicitly set the max_seq_len
19
+ "max_seq_len" : 4096,
20
+ "max_answer_len" : 1024,
21
+ "max_new_token": 1024,
22
+ "repetition_penalty": 1.1,
23
+ "temperature": 0.1,
24
+ "top_k": 50,
25
+ "top_p": 0.9,
26
+ "stream": True,
27
+ "threads": int(os.cpu_count() / 2),
28
+ }
29
+
30
+ llm_init = CTransformers(model=local_llm, model_type="mistral", lib="avx2", **config)
31
+
32
+ prompt_template = """Use the following piece of information to answers the question asked by the user.
33
+ Don't try to make up the answer if you don't know the answer, simply say I don't know.
34
+
35
+ Context: {context}
36
+ Question: {question}
37
+
38
+ Only helpful answer below.
39
+ Helpful answer:
40
+ """
41
+
42
+ # The model to create the embeddings of the user queries
43
+ model_name = "BAAI/bge-large-en"
44
+ model_kwargs = {"device": "cpu"}
45
+ encode_kwargs = {"normalize_embeddings": False}
46
+
47
+ embeddings = HuggingFaceBgeEmbeddings(
48
+ model_name=model_name,
49
+ model_kwargs=model_kwargs,
50
+ encode_kwargs=encode_kwargs,
51
+ )
52
+
53
+ prompt = PromptTemplate(
54
+ template=prompt_template, input_variables=["context", "question"]
55
+ )
56
+
57
+ load_vector_store = Chroma(
58
+ persist_directory="./stores/enron_cosine", embedding_function=embeddings
59
+ )
60
+
61
+ retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
62
+
63
+ #query = "In what context is mentioned Natural Gas Storage Overview?"
64
+ #semantic_search = retriever.get_relevant_documents(query)
65
+ #print(semantic_search)
66
+
67
+ # chain_type_kwargs = {"prompt": prompt}
68
+
69
+ # qa = RetrievalQA.from_chain_type(
70
+ # llm=llm_init,
71
+ # chain_type="stuff",
72
+ # retriever=retriever,
73
+ # verbose=True,
74
+ # chain_type_kwargs=chain_type_kwargs,
75
+ # return_source_documents=True,
76
+ # )
77
+
78
+
79
+ sample_query = []
80
+
81
+ def get_response(input):
82
+ query = input
83
+ chain_type_kwargs = {"prompt": prompt}
84
+ qa = RetrievalQA.from_chain_type(
85
+ llm=llm_init,
86
+ chain_type="stuff",
87
+ retriever=retriever,
88
+ verbose=True,
89
+ chain_type_kwargs=chain_type_kwargs,
90
+ return_source_documents=True,
91
+ )
92
+ response = qa(query)
93
+ print("Response:", response)
94
+ return response
95
+
96
+ input = gr.Text(
97
+ label="Query",
98
+ show_label=True,
99
+ max_lines=2,
100
+ container=False,
101
+ placeholder="Enter your question",
102
+ )
103
+
104
+ gIface = gr.Interface(
105
+ fn=get_response,
106
+ inputs=input,
107
+ outputs="text",
108
+ title="Enron emails RAG AI",
109
+ description="RAG demo using Zephyr 7B Beta and Langchain",
110
+ examples=sample_query,
111
+ allow_flagging="never",
112
+ )
113
+
114
+ gIface.launch()
115
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ ctransformers
3
+ sentence_transformers
4
+ chromadb
5
+ langchain
6
+ pypdf
7
+ gradio