Spaces:
Build error
Build error
add metadata prompt , fix imports , add chroma
Browse files- app.py +9 -6
- globalvars.py +2 -0
- requirements.txt +1 -0
app.py
CHANGED
@@ -10,10 +10,11 @@ from torch import Tensor
|
|
10 |
from transformers import AutoTokenizer, AutoModel
|
11 |
from huggingface_hub import InferenceClient
|
12 |
from openai import OpenAI
|
13 |
-
from langchain_community.document_loaders import UnstructuredFileLoader
|
|
|
14 |
from chromadb import Documents, EmbeddingFunction, Embeddings
|
15 |
from chromadb.config import Settings
|
16 |
-
from chromadb import HttpClient
|
17 |
from utils import load_env_variables, parse_and_route
|
18 |
from globalvars import API_BASE, intention_prompt, tasks, system_message, model_name
|
19 |
from dotenv import load_dotenv
|
@@ -97,7 +98,7 @@ def load_documents(file_path: str, mode: str = "elements"):
|
|
97 |
return [doc.page_content for doc in docs]
|
98 |
|
99 |
def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
|
100 |
-
client = HttpClient(host='localhost', port=8000, settings = Settings(allow_reset=True, anonymized_telemetry=False))
|
101 |
client.reset() # resets the database
|
102 |
collection = client.create_collection(collection_name)
|
103 |
return client, collection
|
@@ -110,7 +111,9 @@ def query_chroma(client, collection_name: str, query_text: str, embedding_functi
|
|
110 |
db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
|
111 |
result_docs = db.similarity_search(query_text)
|
112 |
return result_docs
|
113 |
-
|
|
|
|
|
114 |
# Initialize clients
|
115 |
intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
|
116 |
embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
|
@@ -149,11 +152,11 @@ def upload_documents(files):
|
|
149 |
for file in files:
|
150 |
loader = UnstructuredFileLoader(file.name)
|
151 |
documents = loader.load_documents()
|
152 |
-
|
153 |
return "Documents uploaded and processed successfully!"
|
154 |
|
155 |
def query_documents(query):
|
156 |
-
results =
|
157 |
return "\n\n".join([result.content for result in results])
|
158 |
|
159 |
with gr.Blocks() as demo:
|
|
|
10 |
from transformers import AutoTokenizer, AutoModel
|
11 |
from huggingface_hub import InferenceClient
|
12 |
from openai import OpenAI
|
13 |
+
from langchain_community.document_loaders import UnstructuredFileLoader
|
14 |
+
from langchain_chroma import Chroma
|
15 |
from chromadb import Documents, EmbeddingFunction, Embeddings
|
16 |
from chromadb.config import Settings
|
17 |
+
from chromadb import HttpClient
|
18 |
from utils import load_env_variables, parse_and_route
|
19 |
from globalvars import API_BASE, intention_prompt, tasks, system_message, model_name
|
20 |
from dotenv import load_dotenv
|
|
|
98 |
return [doc.page_content for doc in docs]
|
99 |
|
100 |
def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
|
101 |
+
client = chromadb.HttpClient(host='localhost', port=8000, settings = Settings(allow_reset=True, anonymized_telemetry=False))
|
102 |
client.reset() # resets the database
|
103 |
collection = client.create_collection(collection_name)
|
104 |
return client, collection
|
|
|
111 |
db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
|
112 |
result_docs = db.similarity_search(query_text)
|
113 |
return result_docs
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
# Initialize clients
|
118 |
intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
|
119 |
embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
|
|
|
152 |
for file in files:
|
153 |
loader = UnstructuredFileLoader(file.name)
|
154 |
documents = loader.load_documents()
|
155 |
+
add_documents_to_chroma(documents)
|
156 |
return "Documents uploaded and processed successfully!"
|
157 |
|
158 |
def query_documents(query):
|
159 |
+
results = query_chroma(query)
|
160 |
return "\n\n".join([result.content for result in results])
|
161 |
|
162 |
with gr.Blocks() as demo:
|
globalvars.py
CHANGED
@@ -87,4 +87,6 @@ produce a complete json schema."
|
|
87 |
|
88 |
you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
|
89 |
|
|
|
|
|
90 |
system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""
|
|
|
87 |
|
88 |
you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
|
89 |
|
90 |
+
metadata_prompt = "you will recieve a text or a question, produce metadata operator pairs for the text . ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION , ONLY PRODUCE ONE METADATA STRING PER OPERATOR:"
|
91 |
+
|
92 |
system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""
|
requirements.txt
CHANGED
@@ -7,6 +7,7 @@ openai
|
|
7 |
python-dotenv
|
8 |
chromadb
|
9 |
langchain-community
|
|
|
10 |
unstructured[all-docs]
|
11 |
libmagic
|
12 |
# poppler
|
|
|
7 |
python-dotenv
|
8 |
chromadb
|
9 |
langchain-community
|
10 |
+
langchain-chroma
|
11 |
unstructured[all-docs]
|
12 |
libmagic
|
13 |
# poppler
|