Spaces:

Heraali
/

OCN_CSChatbot

Running

App Files Files Community

Heraali commited on 4 days ago

Commit

d918aeb

•

1 Parent(s): 22f193b

Upload app.py

Browse files

Files changed (1) hide show

app.py +24 -62

app.py CHANGED Viewed

@@ -3,10 +3,6 @@ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 from sentence_transformers import SentenceTransformer, util
 import gradio as gr
 import json
-import logging
-# Setup logging
-logging.basicConfig(filename='chatbot_logs.log', level=logging.INFO)
 # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
 model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
@@ -54,8 +50,8 @@ def create_knowledge_base_embeddings(knowledge_base):
 # Create knowledge base embeddings
 knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
-# Function to retrieve the best context using semantic similarity with dynamic thresholds
-def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55):
     # Create embedding for the question
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
@@ -66,53 +62,36 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
     best_match_idx = torch.argmax(cosine_scores).item()
     best_match_score = cosine_scores[0, best_match_idx].item()
-    logging.info(f"Question: {question} - Best match score: {best_match_score}")
-    # Log if the similarity score is too low
-    if best_match_score < threshold:
-        logging.warning(f"Low similarity score ({best_match_score}) for question: {question}")
-        return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
-    best_match_entry = knowledge_base[best_match_idx]
-    # Check if FAQ section exists and prioritize FAQ answers
-    for content_item in best_match_entry['content']:
-        if 'faq' in content_item:
-            for faq in content_item['faq']:
-                if faq['question'].lower() in question.lower():
-                    return faq['answer']
-    # If no FAQ is found, check for steps
-    for content_item in best_match_entry['content']:
-        if 'steps' in content_item:
-            step_details = [step['details'] for step in content_item['steps']]
-            return "\n".join(step_details)
-    # Fallback to regular text
-    for content_item in best_match_entry['content']:
-        if 'text' in content_item:
-            return content_item['text']
-    return "Lo siento, no encontré una respuesta adecuada a tu pregunta."
 # Check expanded QA dataset first for a direct answer
 def get_answer_from_expanded_qa(question, expanded_qa_dataset):
     for item in expanded_qa_dataset:
         if item['question'].lower() in question.lower():
-            logging.info(f"Direct match found in expanded QA dataset for question: {question}")
             return item['answer']
     return None
-# Collect user feedback for improving the model (Placeholder for future enhancement)
-def collect_user_feedback(question, user_answer, correct_answer, feedback):
-    # Placeholder: Save feedback to a file or database
-    with open('user_feedback.log', 'a') as feedback_log:
-        feedback_log.write(f"Question: {question}\n")
-        feedback_log.write(f"User Answer: {user_answer}\n")
-        feedback_log.write(f"Correct Answer: {correct_answer}\n")
-        feedback_log.write(f"Feedback: {feedback}\n\n")
-    logging.info(f"Feedback collected for question: {question}")
 # Answer function for the Gradio app
 def answer_question(question):
     # Check if the question matches any entry in the expanded QA dataset
@@ -121,24 +100,10 @@ def answer_question(question):
         return direct_answer
     # If no direct answer found, use the knowledge base with semantic search
-    context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55)
     return context
 # Gradio interface setup
-def feedback_interface(question, user_answer, correct_answer, feedback):
-    collect_user_feedback(question, user_answer, correct_answer, feedback)
-    return "Thank you for your feedback!"
-# Gradio interface setup for feedback collection
-feedback_gr = gr.Interface(
-    fn=feedback_interface,
-    inputs=["text", "text", "text", "text"],
-    outputs="text",
-    title="Feedback Collection",
-    description="Submit feedback on the chatbot responses."
-)
-# Main interface
 interface = gr.Interface(
     fn=answer_question,
     inputs="text",
@@ -149,6 +114,3 @@ interface = gr.Interface(
 # Launch the Gradio interface
 interface.launch()
-# Launch the feedback interface separately
-feedback_gr.launch(share=True)

 from sentence_transformers import SentenceTransformer, util
 import gradio as gr
 import json
 # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
 model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
 # Create knowledge base embeddings
 knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
+# Function to retrieve the best context using semantic similarity
+def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.5):
     # Create embedding for the question
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
     best_match_idx = torch.argmax(cosine_scores).item()
     best_match_score = cosine_scores[0, best_match_idx].item()
+    if best_match_score > threshold:  # Set a threshold for semantic similarity
+        best_match_entry = knowledge_base[best_match_idx]
+        # Check if FAQ section exists and prioritize FAQ answers
+        for content_item in best_match_entry['content']:
+            if 'faq' in content_item:
+                for faq in content_item['faq']:
+                    if faq['question'].lower() in question.lower():
+                        return faq['answer']
+        # If no FAQ is found, check for steps
+        for content_item in best_match_entry['content']:
+            if 'steps' in content_item:
+                step_details = [step['details'] for step in content_item['steps']]
+                return "\n".join(step_details)
+        # Fallback to regular text
+        for content_item in best_match_entry['content']:
+            if 'text' in content_item:
+                return content_item['text']
+    return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
 # Check expanded QA dataset first for a direct answer
 def get_answer_from_expanded_qa(question, expanded_qa_dataset):
     for item in expanded_qa_dataset:
         if item['question'].lower() in question.lower():
             return item['answer']
     return None
 # Answer function for the Gradio app
 def answer_question(question):
     # Check if the question matches any entry in the expanded QA dataset
         return direct_answer
     # If no direct answer found, use the knowledge base with semantic search
+    context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings)
     return context
 # Gradio interface setup
 interface = gr.Interface(
     fn=answer_question,
     inputs="text",
 # Launch the Gradio interface
 interface.launch()