Spaces:

sambanovasystems
/

enterprise_knowledge_retriever

Running

App Files Files Community

petrojm commited on 22 days ago

Commit

7801fa3

•

1 Parent(s): 48b6b86

changes to app and config

Browse files

Files changed (2) hide show

app.py +8 -6
config.yaml +1 -1

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import sys
 import yaml
 import gradio as gr
-from datetime import datetime
 current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -25,7 +25,9 @@ def handle_userinput(user_question, conversation_chain, history):
             return history, ""
         except Exception as e:
-            return f"An error occurred: {str(e)}", ""
     else:
         return history, ""
@@ -33,10 +35,11 @@ def process_documents(files, collection_name, document_retrieval, vectorstore, c
     try:
         document_retrieval = DocumentRetrieval()
         _, _, text_chunks = parse_doc_universal(doc=files)
         print(text_chunks)
         embeddings = document_retrieval.load_embedding_model()
-        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        collection_name = 'ekr_default_collection' + "-" + timestamp
         vectorstore = document_retrieval.create_vector_store(text_chunks, embeddings, output_db=save_location, collection_name=collection_name)
         document_retrieval.init_retriever(vectorstore)
         conversation_chain = document_retrieval.get_qa_retrieval_chain()
@@ -50,7 +53,6 @@ with open(CONFIG_PATH, 'r') as yaml_file:
     config = yaml.safe_load(yaml_file)
 prod_mode = config.get('prod_mode', False)
-#default_collection = 'ekr_default_collection'
 # Load env variables
 initialize_env_variables(prod_mode)
@@ -83,7 +85,7 @@ with gr.Blocks() as demo:
     gr.Markdown(caution_text)
     # Preprocessing events
-    process_btn.click(process_documents, inputs=[docs, collection_name, document_retrieval, vectorstore, conversation_chain], outputs=[conversation_chain, vectorstore, document_retrieval, collection_name, setup_output], concurrency_limit=10)
     # Step 3: Chat with your data
     gr.Markdown("## 3️⃣ Chat with your document")

 import sys
 import yaml
 import gradio as gr
+import uuid
 current_dir = os.path.dirname(os.path.abspath(__file__))
             return history, ""
         except Exception as e:
+            error_msg = f"An error occurred: {str(e)}"
+            history = history + [(user_question, error_msg)]
+            return history, ""
     else:
         return history, ""
     try:
         document_retrieval = DocumentRetrieval()
         _, _, text_chunks = parse_doc_universal(doc=files)
+        print(len(text_chunks))
         print(text_chunks)
         embeddings = document_retrieval.load_embedding_model()
+        collection_id = str(uuid.uuid4())
+        collection_name = f"collection_{collection_id}"
         vectorstore = document_retrieval.create_vector_store(text_chunks, embeddings, output_db=save_location, collection_name=collection_name)
         document_retrieval.init_retriever(vectorstore)
         conversation_chain = document_retrieval.get_qa_retrieval_chain()
     config = yaml.safe_load(yaml_file)
 prod_mode = config.get('prod_mode', False)
 # Load env variables
 initialize_env_variables(prod_mode)
     gr.Markdown(caution_text)
     # Preprocessing events
+    process_btn.click(process_documents, inputs=[docs, collection_name, document_retrieval, vectorstore, conversation_chain], outputs=[conversation_chain, vectorstore, document_retrieval, collection_name, setup_output], concurrency_limit=20)
     # Step 3: Chat with your data
     gr.Markdown("## 3️⃣ Chat with your document")

config.yaml CHANGED Viewed

@@ -11,7 +11,7 @@ llm:
     "do_sample": False
     "max_tokens_to_generate": 1200
     "coe": True #set as true if using Sambastudio CoE endpoint
-    "select_expert": "llama3-8b" #set if using sncloud, SambaStudio CoE llm expert
     #sncloud CoE expert name -> "llama3-8b"
 retrieval:

     "do_sample": False
     "max_tokens_to_generate": 1200
     "coe": True #set as true if using Sambastudio CoE endpoint
+    "select_expert": "Meta-Llama-3.1-8B-Instruct" #set if using sncloud, SambaStudio CoE llm expert
     #sncloud CoE expert name -> "llama3-8b"
 retrieval: