Spaces:

Tonic
/

YiJina

Build error

App Files Files Community

Tonic commited on Jul 11

Commit

190f21f

•

1 Parent(s): 45cde94

add metadata prompt , fix imports , add chroma

Browse files

Files changed (3) hide show

app.py +9 -6
globalvars.py +2 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -10,10 +10,11 @@ from torch import Tensor
 from transformers import AutoTokenizer, AutoModel
 from huggingface_hub import InferenceClient
 from openai import OpenAI
-from langchain_community.document_loaders import UnstructuredFileLoader
 from chromadb import Documents, EmbeddingFunction, Embeddings
 from chromadb.config import Settings
-from chromadb import HttpClient
 from utils import load_env_variables, parse_and_route
 from globalvars import API_BASE, intention_prompt, tasks, system_message, model_name
 from dotenv import load_dotenv
@@ -97,7 +98,7 @@ def load_documents(file_path: str, mode: str = "elements"):
     return [doc.page_content for doc in docs]
 def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
-    client = HttpClient(host='localhost', port=8000, settings = Settings(allow_reset=True, anonymized_telemetry=False))
     client.reset()  # resets the database
     collection = client.create_collection(collection_name)
     return client, collection
@@ -110,7 +111,9 @@ def query_chroma(client, collection_name: str, query_text: str, embedding_functi
     db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
     result_docs = db.similarity_search(query_text)
     return result_docs
 # Initialize clients
 intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
 embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
@@ -149,11 +152,11 @@ def upload_documents(files):
     for file in files:
         loader = UnstructuredFileLoader(file.name)
         documents = loader.load_documents()
-        chroma_manager.add_documents(documents)
     return "Documents uploaded and processed successfully!"
 def query_documents(query):
-    results = chroma_manager.query(query)
     return "\n\n".join([result.content for result in results])
 with gr.Blocks() as demo:

 from transformers import AutoTokenizer, AutoModel
 from huggingface_hub import InferenceClient
 from openai import OpenAI
+from langchain_community.document_loaders import UnstructuredFileLoader
+from langchain_chroma import Chroma
 from chromadb import Documents, EmbeddingFunction, Embeddings
 from chromadb.config import Settings
+from chromadb import HttpClient
 from utils import load_env_variables, parse_and_route
 from globalvars import API_BASE, intention_prompt, tasks, system_message, model_name
 from dotenv import load_dotenv
     return [doc.page_content for doc in docs]
 def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
+    client = chromadb.HttpClient(host='localhost', port=8000, settings = Settings(allow_reset=True, anonymized_telemetry=False))
     client.reset()  # resets the database
     collection = client.create_collection(collection_name)
     return client, collection
     db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
     result_docs = db.similarity_search(query_text)
     return result_docs
 # Initialize clients
 intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
 embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
     for file in files:
         loader = UnstructuredFileLoader(file.name)
         documents = loader.load_documents()
+        add_documents_to_chroma(documents)
     return "Documents uploaded and processed successfully!"
 def query_documents(query):
+    results = query_chroma(query)
     return "\n\n".join([result.content for result in results])
 with gr.Blocks() as demo:

globalvars.py CHANGED Viewed

@@ -87,4 +87,6 @@ produce a complete json schema."
 you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
 system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""

 you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
+metadata_prompt = "you will recieve a text or a question, produce metadata operator pairs for the text . ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION , ONLY PRODUCE ONE METADATA STRING PER OPERATOR:"
 system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""

requirements.txt CHANGED Viewed

@@ -7,6 +7,7 @@ openai
 python-dotenv
 chromadb
 langchain-community
 unstructured[all-docs]
 libmagic
 # poppler

 python-dotenv
 chromadb
 langchain-community
+langchain-chroma
 unstructured[all-docs]
 libmagic
 # poppler