Spaces:

Heraali
/

OCN_CSChatbot

Running

App Files Files Community

Heraali commited on 4 days ago

Commit

22f193b

•

1 Parent(s): 59acace

Upload app.py

Browse files

Files changed (1) hide show

app.py +66 -67

app.py CHANGED Viewed

@@ -3,6 +3,10 @@ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 from sentence_transformers import SentenceTransformer, util
 import gradio as gr
 import json
 # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
 model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
@@ -50,31 +54,8 @@ def create_knowledge_base_embeddings(knowledge_base):
 # Create knowledge base embeddings
 knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
-# Create embeddings for the expanded QA dataset (for semantic matching)
-def create_expanded_qa_embeddings(expanded_qa_dataset):
-    qa_embeddings = []
-    for item in expanded_qa_dataset:
-        qa_embeddings.append({
-            "question": item["question"],
-            "answer": item["answer"],
-            "embedding": embedding_model.encode(item["question"], convert_to_tensor=True)
-        })
-    return qa_embeddings
-# Create expanded QA dataset embeddings
-expanded_qa_embeddings = create_expanded_qa_embeddings(expanded_qa_dataset)
-# Dynamic threshold adjustment based on query length
-def adjust_threshold_based_on_query_length(question_length):
-    if question_length <= 5:  # Short question, use higher threshold
-        return 0.7
-    elif 5 < question_length <= 10:  # Medium-length question
-        return 0.6
-    else:  # Longer question, use lower threshold
-        return 0.5
-# Function to retrieve the best context using semantic similarity (Knowledge Base)
-def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
     # Create embedding for the question
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
@@ -85,64 +66,79 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
     best_match_idx = torch.argmax(cosine_scores).item()
     best_match_score = cosine_scores[0, best_match_idx].item()
-    # Adjust the threshold based on the query length
-    dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
-    if best_match_score > dynamic_threshold:  # Use dynamic threshold
-        best_match_entry = knowledge_base[best_match_idx]
-        # Check if FAQ section exists and prioritize FAQ answers
-        for content_item in best_match_entry['content']:
-            if 'faq' in content_item:
-                for faq in content_item['faq']:
-                    if faq['question'].lower() in question.lower():
-                        return faq['answer']
-        # If no FAQ is found, check for steps
-        for content_item in best_match_entry['content']:
-            if 'steps' in content_item:
-                step_details = [step['details'] for step in content_item['steps']]
-                return "\n".join(step_details)
-        # Fallback to regular text
-        for content_item in best_match_entry['content']:
-            if 'text' in content_item:
-                return content_item['text']
-    return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
-# Semantic search in expanded QA dataset
-def get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings):
-    # Create embedding for the question
-    question_embedding = embedding_model.encode(question, convert_to_tensor=True)
-    # Calculate cosine similarity between the question and the expanded QA dataset
-    qa_cosine_scores = [util.pytorch_cos_sim(question_embedding, item["embedding"]) for item in expanded_qa_embeddings]
-    # Find the highest score and return the corresponding answer if similarity is high enough
-    best_match_idx = torch.argmax(torch.tensor(qa_cosine_scores)).item()
-    best_match_score = qa_cosine_scores[best_match_idx].item()
-    # Dynamic threshold adjustment based on query length
-    dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
-    if best_match_score > dynamic_threshold:  # Use dynamic threshold
-        return expanded_qa_embeddings[best_match_idx]["answer"]
     return None
-# Check expanded QA dataset first for a direct or semantic match
 def answer_question(question):
-    # Check for a semantic match in the expanded QA dataset
-    direct_answer = get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings)
     if direct_answer:
         return direct_answer
     # If no direct answer found, use the knowledge base with semantic search
-    context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings)
     return context
 # Gradio interface setup
 interface = gr.Interface(
     fn=answer_question,
     inputs="text",
@@ -153,3 +149,6 @@ interface = gr.Interface(
 # Launch the Gradio interface
 interface.launch()

 from sentence_transformers import SentenceTransformer, util
 import gradio as gr
 import json
+import logging
+# Setup logging
+logging.basicConfig(filename='chatbot_logs.log', level=logging.INFO)
 # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
 model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
 # Create knowledge base embeddings
 knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
+# Function to retrieve the best context using semantic similarity with dynamic thresholds
+def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55):
     # Create embedding for the question
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
     best_match_idx = torch.argmax(cosine_scores).item()
     best_match_score = cosine_scores[0, best_match_idx].item()
+    logging.info(f"Question: {question} - Best match score: {best_match_score}")
+    # Log if the similarity score is too low
+    if best_match_score < threshold:
+        logging.warning(f"Low similarity score ({best_match_score}) for question: {question}")
+        return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
+    best_match_entry = knowledge_base[best_match_idx]
+    # Check if FAQ section exists and prioritize FAQ answers
+    for content_item in best_match_entry['content']:
+        if 'faq' in content_item:
+            for faq in content_item['faq']:
+                if faq['question'].lower() in question.lower():
+                    return faq['answer']
+    # If no FAQ is found, check for steps
+    for content_item in best_match_entry['content']:
+        if 'steps' in content_item:
+            step_details = [step['details'] for step in content_item['steps']]
+            return "\n".join(step_details)
+    # Fallback to regular text
+    for content_item in best_match_entry['content']:
+        if 'text' in content_item:
+            return content_item['text']
+    return "Lo siento, no encontré una respuesta adecuada a tu pregunta."
+# Check expanded QA dataset first for a direct answer
+def get_answer_from_expanded_qa(question, expanded_qa_dataset):
+    for item in expanded_qa_dataset:
+        if item['question'].lower() in question.lower():
+            logging.info(f"Direct match found in expanded QA dataset for question: {question}")
+            return item['answer']
     return None
+# Collect user feedback for improving the model (Placeholder for future enhancement)
+def collect_user_feedback(question, user_answer, correct_answer, feedback):
+    # Placeholder: Save feedback to a file or database
+    with open('user_feedback.log', 'a') as feedback_log:
+        feedback_log.write(f"Question: {question}\n")
+        feedback_log.write(f"User Answer: {user_answer}\n")
+        feedback_log.write(f"Correct Answer: {correct_answer}\n")
+        feedback_log.write(f"Feedback: {feedback}\n\n")
+    logging.info(f"Feedback collected for question: {question}")
+# Answer function for the Gradio app
 def answer_question(question):
+    # Check if the question matches any entry in the expanded QA dataset
+    direct_answer = get_answer_from_expanded_qa(question, expanded_qa_dataset)
     if direct_answer:
         return direct_answer
     # If no direct answer found, use the knowledge base with semantic search
+    context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55)
     return context
 # Gradio interface setup
+def feedback_interface(question, user_answer, correct_answer, feedback):
+    collect_user_feedback(question, user_answer, correct_answer, feedback)
+    return "Thank you for your feedback!"
+# Gradio interface setup for feedback collection
+feedback_gr = gr.Interface(
+    fn=feedback_interface,
+    inputs=["text", "text", "text", "text"],
+    outputs="text",
+    title="Feedback Collection",
+    description="Submit feedback on the chatbot responses."
+)
+# Main interface
 interface = gr.Interface(
     fn=answer_question,
     inputs="text",
 # Launch the Gradio interface
 interface.launch()
+# Launch the feedback interface separately
+feedback_gr.launch(share=True)