Spaces:
Runtime error
Runtime error
commit
Browse files
app.py
ADDED
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.vectorstores import Qdrant
|
2 |
+
from langchain_together import Together
|
3 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
4 |
+
from qdrant_client import QdrantClient
|
5 |
+
from langchain_core.prompts import PromptTemplate
|
6 |
+
import os
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
from langchain_community.vectorstores import Qdrant
|
10 |
+
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
11 |
+
from langchain.docstore.document import Document
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
# formatting the data for ingestion
|
15 |
+
all_prods_df = pd.read_csv("data/cleaned_CSVIndian10000.csv")
|
16 |
+
all_prods_df = all_prods_df.fillna("")
|
17 |
+
|
18 |
+
product_metadata = all_prods_df.to_dict(orient="index")
|
19 |
+
|
20 |
+
texts = [str(v['name']) + "\n" + str(v['product_desc']) for k, v in product_metadata.items()]
|
21 |
+
|
22 |
+
metadatas = list(product_metadata.values())
|
23 |
+
|
24 |
+
docs = [Document(page_content=txt, metadata={"source": meta}) for txt, meta in zip(texts, metadatas)]
|
25 |
+
|
26 |
+
print("Data loaded.........")
|
27 |
+
|
28 |
+
|
29 |
+
# load the embedding model
|
30 |
+
model_name = "BAAI/bge-large-en"
|
31 |
+
model_kwargs = {"device": "cpu"}
|
32 |
+
encode_kwargs = {"normalize_embeddings": True}
|
33 |
+
|
34 |
+
embeddings = HuggingFaceBgeEmbeddings(
|
35 |
+
model_name=model_name,
|
36 |
+
model_kwargs=model_kwargs,
|
37 |
+
encode_kwargs=encode_kwargs
|
38 |
+
)
|
39 |
+
|
40 |
+
print("Embedding model loaded.........")
|
41 |
+
|
42 |
+
|
43 |
+
# load the vector store
|
44 |
+
# url="http://localhost:6333"
|
45 |
+
collection_name = "shopintel100v3"
|
46 |
+
|
47 |
+
vector_store = Qdrant.from_documents(
|
48 |
+
docs,
|
49 |
+
embeddings,
|
50 |
+
location=":memory:",
|
51 |
+
collection_name=collection_name,
|
52 |
+
prefer_grpc = False
|
53 |
+
)
|
54 |
+
|
55 |
+
print("Vector store loaded.........")
|
56 |
+
|
57 |
+
|
58 |
+
load_dotenv()
|
59 |
+
|
60 |
+
TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
|
61 |
+
print("api key: ", TOGETHER_API_KEY, type(TOGETHER_API_KEY))
|
62 |
+
|
63 |
+
|
64 |
+
# load the embedding model
|
65 |
+
# model_name = "BAAI/bge-large-en"
|
66 |
+
# model_kwargs = {"device": "cpu"}
|
67 |
+
# encode_kwargs = {"normalize_embeddings": True}
|
68 |
+
|
69 |
+
# embeddings = HuggingFaceBgeEmbeddings(
|
70 |
+
# model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
|
71 |
+
# )
|
72 |
+
# print("embeddings loaded.............")
|
73 |
+
|
74 |
+
# url = "http://localhost:6333"
|
75 |
+
# collection_name = "shopintel100v3"
|
76 |
+
|
77 |
+
# client = QdrantClient(url=url, prefer_grpc=False)
|
78 |
+
|
79 |
+
# vector_store = Qdrant(
|
80 |
+
# client=client,
|
81 |
+
# collection_name=collection_name,
|
82 |
+
# embeddings=embeddings
|
83 |
+
# )
|
84 |
+
|
85 |
+
print("qdrant embeddings from docker were loaded.............")
|
86 |
+
|
87 |
+
llm = Together(
|
88 |
+
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
89 |
+
temperature=0.2,
|
90 |
+
max_tokens=5000,
|
91 |
+
top_k=50,
|
92 |
+
together_api_key=TOGETHER_API_KEY
|
93 |
+
)
|
94 |
+
|
95 |
+
|
96 |
+
# query = "ASUS VivoBook 15 (2021)"
|
97 |
+
# result = vector_store.similarity_search_with_score(query=query, k=5)
|
98 |
+
|
99 |
+
# for i in result:
|
100 |
+
# doc, score = i
|
101 |
+
# print({"score": score, "content": doc.page_content, "metadata": doc.metadata["source"]})
|
102 |
+
# print("---------------------------------")
|
103 |
+
|
104 |
+
# function to retrieve products from qdrant
|
105 |
+
|
106 |
+
def retrieve_product(user_input, vector_store, k = 10):
|
107 |
+
result = vector_store.similarity_search_with_score(
|
108 |
+
query=user_input,
|
109 |
+
k=k
|
110 |
+
)
|
111 |
+
|
112 |
+
return result
|
113 |
+
|
114 |
+
|
115 |
+
# function to create context from user query
|
116 |
+
|
117 |
+
def create_context(user_input, vector_store):
|
118 |
+
result = retrieve_product(user_input, vector_store)
|
119 |
+
|
120 |
+
context = ""
|
121 |
+
for index, value in enumerate(result):
|
122 |
+
product = value
|
123 |
+
product_title = product[0].page_content # Extracting the page_content for each result which is a string
|
124 |
+
product_metadata = product[0].metadata["source"] # Extracting the metadata for each result which is a dictionary with key values
|
125 |
+
|
126 |
+
context += f"""
|
127 |
+
* Product {index + 1} -
|
128 |
+
- Product name : {product_metadata["name"]}
|
129 |
+
- Product price: {product_metadata["discount_price"]}
|
130 |
+
- Brief description of the product: {product_metadata["product_desc"]}
|
131 |
+
- Detailed description of the product: {product_metadata["about_this_item"]}
|
132 |
+
- Rating value (1.0 - 5.0): {product_metadata["ratings"]}
|
133 |
+
- Overall review: {product_metadata["overall_review"]}
|
134 |
+
|
135 |
+
|
136 |
+
"""
|
137 |
+
# print(f"product_title: {type(product_title)}", product_title)
|
138 |
+
# print(f"product_metadata: {type(product_metadata)}", product_metadata)
|
139 |
+
|
140 |
+
return context
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
# prompt template for the mistral model
|
145 |
+
|
146 |
+
template = """You are a friendly, conversational AI ecommerce assistant. The context includes 5 ecommerce products.
|
147 |
+
Use only the following context, to find the answer to the questions from the customer.
|
148 |
+
|
149 |
+
Its very important that you follow the below instructions.
|
150 |
+
-Dont use general knowledge to answer the question
|
151 |
+
-If you dont find the answer from the context or the question is not related to the context, just say that you don't know the answer.
|
152 |
+
-By any chance the customer should not know you are referring to a context.
|
153 |
+
|
154 |
+
|
155 |
+
Context:
|
156 |
+
|
157 |
+
{context}
|
158 |
+
|
159 |
+
|
160 |
+
Question:
|
161 |
+
{question}
|
162 |
+
|
163 |
+
|
164 |
+
Helpful Answer:"""
|
165 |
+
|
166 |
+
|
167 |
+
import random
|
168 |
+
import gradio as gr
|
169 |
+
|
170 |
+
chat_history = []
|
171 |
+
def respond(message, chat_history):
|
172 |
+
global vector_store, template, llm
|
173 |
+
chatbot_response = ""
|
174 |
+
try:
|
175 |
+
context = create_context(message, vector_store)
|
176 |
+
print("context:-------------------------\n", context)
|
177 |
+
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
|
178 |
+
prompt_formatted_str = prompt.format(
|
179 |
+
context=context,
|
180 |
+
question=message
|
181 |
+
)
|
182 |
+
output = llm.invoke(prompt_formatted_str)
|
183 |
+
chat_history.append((message, output))
|
184 |
+
return "", chat_history
|
185 |
+
except Exception as e:
|
186 |
+
print("Error:", e)
|
187 |
+
error_responses = [
|
188 |
+
"Sorry, I encountered an error while processing your request.",
|
189 |
+
"Oops, something went wrong. Please try again later.",
|
190 |
+
"I'm having trouble understanding that. Can you please rephrase?",
|
191 |
+
"It seems there was an issue. Let's try something else."
|
192 |
+
]
|
193 |
+
error_message = random.choice(error_responses)
|
194 |
+
output = error_message
|
195 |
+
chat_history.append((message, output))
|
196 |
+
return "", chat_history
|
197 |
+
|
198 |
+
# Define the Gradio interface
|
199 |
+
# chatbot = gr.Chatbot(height=450)
|
200 |
+
# msg = gr.Textbox(label="What would you like to know?")
|
201 |
+
# gr.Interface(
|
202 |
+
# fn=respond,
|
203 |
+
# inputs=msg,
|
204 |
+
# outputs=gr.Textbox(label="Response"),
|
205 |
+
# title="Conversational AI Chatbot",
|
206 |
+
# ).launch(
|
207 |
+
# share=True,
|
208 |
+
# )
|
209 |
+
|
210 |
+
# # Define Gradio components
|
211 |
+
with gr.Blocks() as demo:
|
212 |
+
chat_history = []
|
213 |
+
chatbot = gr.Chatbot(height=450)
|
214 |
+
msg = gr.Textbox(label="What would you like to know?")
|
215 |
+
btn = gr.Button("Submit")
|
216 |
+
clear = gr.ClearButton(value="Clear Console", components=[msg, chatbot])
|
217 |
+
|
218 |
+
# Button click event to respond to the message
|
219 |
+
btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
|
220 |
+
|
221 |
+
# Clear button event to clear the console
|
222 |
+
msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
|
223 |
+
|
224 |
+
# Define the Gradio interface
|
225 |
+
gr.close_all()
|
226 |
+
|
227 |
+
demo.launch()
|