Spaces:

Heraali
/

OCN_CSChatbot

Sleeping

App Files Files Community

Heraali commited on Sep 17

Commit

59acace

•

1 Parent(s): f0dd881

Upload app.py

Browse files

Files changed (1) hide show

app.py +49 -10

app.py CHANGED Viewed

@@ -50,7 +50,30 @@ def create_knowledge_base_embeddings(knowledge_base):
 # Create knowledge base embeddings
 knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
-# Function to retrieve the best context using semantic similarity
 def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
     # Create embedding for the question
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
@@ -62,7 +85,10 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
     best_match_idx = torch.argmax(cosine_scores).item()
     best_match_score = cosine_scores[0, best_match_idx].item()
-    if best_match_score > 0.5:  # Set a threshold for semantic similarity
         best_match_entry = knowledge_base[best_match_idx]
         # Check if FAQ section exists and prioritize FAQ answers
@@ -85,17 +111,30 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
     return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
-# Check expanded QA dataset first for a direct answer
-def get_answer_from_expanded_qa(question, expanded_qa_dataset):
-    for item in expanded_qa_dataset:
-        if item['question'].lower() in question.lower():
-            return item['answer']
     return None
-# Answer function for the Gradio app
 def answer_question(question):
-    # Check if the question matches any entry in the expanded QA dataset
-    direct_answer = get_answer_from_expanded_qa(question, expanded_qa_dataset)
     if direct_answer:
         return direct_answer

 # Create knowledge base embeddings
 knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
+# Create embeddings for the expanded QA dataset (for semantic matching)
+def create_expanded_qa_embeddings(expanded_qa_dataset):
+    qa_embeddings = []
+    for item in expanded_qa_dataset:
+        qa_embeddings.append({
+            "question": item["question"],
+            "answer": item["answer"],
+            "embedding": embedding_model.encode(item["question"], convert_to_tensor=True)
+        })
+    return qa_embeddings
+# Create expanded QA dataset embeddings
+expanded_qa_embeddings = create_expanded_qa_embeddings(expanded_qa_dataset)
+# Dynamic threshold adjustment based on query length
+def adjust_threshold_based_on_query_length(question_length):
+    if question_length <= 5:  # Short question, use higher threshold
+        return 0.7
+    elif 5 < question_length <= 10:  # Medium-length question
+        return 0.6
+    else:  # Longer question, use lower threshold
+        return 0.5
+# Function to retrieve the best context using semantic similarity (Knowledge Base)
 def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
     # Create embedding for the question
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
     best_match_idx = torch.argmax(cosine_scores).item()
     best_match_score = cosine_scores[0, best_match_idx].item()
+    # Adjust the threshold based on the query length
+    dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
+    if best_match_score > dynamic_threshold:  # Use dynamic threshold
         best_match_entry = knowledge_base[best_match_idx]
         # Check if FAQ section exists and prioritize FAQ answers
     return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
+# Semantic search in expanded QA dataset
+def get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings):
+    # Create embedding for the question
+    question_embedding = embedding_model.encode(question, convert_to_tensor=True)
+    # Calculate cosine similarity between the question and the expanded QA dataset
+    qa_cosine_scores = [util.pytorch_cos_sim(question_embedding, item["embedding"]) for item in expanded_qa_embeddings]
+    # Find the highest score and return the corresponding answer if similarity is high enough
+    best_match_idx = torch.argmax(torch.tensor(qa_cosine_scores)).item()
+    best_match_score = qa_cosine_scores[best_match_idx].item()
+    # Dynamic threshold adjustment based on query length
+    dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
+    if best_match_score > dynamic_threshold:  # Use dynamic threshold
+        return expanded_qa_embeddings[best_match_idx]["answer"]
     return None
+# Check expanded QA dataset first for a direct or semantic match
 def answer_question(question):
+    # Check for a semantic match in the expanded QA dataset
+    direct_answer = get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings)
     if direct_answer:
         return direct_answer