Heraali commited on
Commit
d918aeb
1 Parent(s): 22f193b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -62
app.py CHANGED
@@ -3,10 +3,6 @@ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
  from sentence_transformers import SentenceTransformer, util
4
  import gradio as gr
5
  import json
6
- import logging
7
-
8
- # Setup logging
9
- logging.basicConfig(filename='chatbot_logs.log', level=logging.INFO)
10
 
11
  # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
12
  model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
@@ -54,8 +50,8 @@ def create_knowledge_base_embeddings(knowledge_base):
54
  # Create knowledge base embeddings
55
  knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
56
 
57
- # Function to retrieve the best context using semantic similarity with dynamic thresholds
58
- def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55):
59
  # Create embedding for the question
60
  question_embedding = embedding_model.encode(question, convert_to_tensor=True)
61
 
@@ -66,53 +62,36 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
66
  best_match_idx = torch.argmax(cosine_scores).item()
67
  best_match_score = cosine_scores[0, best_match_idx].item()
68
 
69
- logging.info(f"Question: {question} - Best match score: {best_match_score}")
70
-
71
- # Log if the similarity score is too low
72
- if best_match_score < threshold:
73
- logging.warning(f"Low similarity score ({best_match_score}) for question: {question}")
74
- return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
75
-
76
- best_match_entry = knowledge_base[best_match_idx]
77
-
78
- # Check if FAQ section exists and prioritize FAQ answers
79
- for content_item in best_match_entry['content']:
80
- if 'faq' in content_item:
81
- for faq in content_item['faq']:
82
- if faq['question'].lower() in question.lower():
83
- return faq['answer']
84
-
85
- # If no FAQ is found, check for steps
86
- for content_item in best_match_entry['content']:
87
- if 'steps' in content_item:
88
- step_details = [step['details'] for step in content_item['steps']]
89
- return "\n".join(step_details)
90
-
91
- # Fallback to regular text
92
- for content_item in best_match_entry['content']:
93
- if 'text' in content_item:
94
- return content_item['text']
95
 
96
- return "Lo siento, no encontré una respuesta adecuada a tu pregunta."
97
 
98
  # Check expanded QA dataset first for a direct answer
99
  def get_answer_from_expanded_qa(question, expanded_qa_dataset):
100
  for item in expanded_qa_dataset:
101
  if item['question'].lower() in question.lower():
102
- logging.info(f"Direct match found in expanded QA dataset for question: {question}")
103
  return item['answer']
104
  return None
105
 
106
- # Collect user feedback for improving the model (Placeholder for future enhancement)
107
- def collect_user_feedback(question, user_answer, correct_answer, feedback):
108
- # Placeholder: Save feedback to a file or database
109
- with open('user_feedback.log', 'a') as feedback_log:
110
- feedback_log.write(f"Question: {question}\n")
111
- feedback_log.write(f"User Answer: {user_answer}\n")
112
- feedback_log.write(f"Correct Answer: {correct_answer}\n")
113
- feedback_log.write(f"Feedback: {feedback}\n\n")
114
- logging.info(f"Feedback collected for question: {question}")
115
-
116
  # Answer function for the Gradio app
117
  def answer_question(question):
118
  # Check if the question matches any entry in the expanded QA dataset
@@ -121,24 +100,10 @@ def answer_question(question):
121
  return direct_answer
122
 
123
  # If no direct answer found, use the knowledge base with semantic search
124
- context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55)
125
  return context
126
 
127
  # Gradio interface setup
128
- def feedback_interface(question, user_answer, correct_answer, feedback):
129
- collect_user_feedback(question, user_answer, correct_answer, feedback)
130
- return "Thank you for your feedback!"
131
-
132
- # Gradio interface setup for feedback collection
133
- feedback_gr = gr.Interface(
134
- fn=feedback_interface,
135
- inputs=["text", "text", "text", "text"],
136
- outputs="text",
137
- title="Feedback Collection",
138
- description="Submit feedback on the chatbot responses."
139
- )
140
-
141
- # Main interface
142
  interface = gr.Interface(
143
  fn=answer_question,
144
  inputs="text",
@@ -149,6 +114,3 @@ interface = gr.Interface(
149
 
150
  # Launch the Gradio interface
151
  interface.launch()
152
-
153
- # Launch the feedback interface separately
154
- feedback_gr.launch(share=True)
 
3
  from sentence_transformers import SentenceTransformer, util
4
  import gradio as gr
5
  import json
 
 
 
 
6
 
7
  # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
8
  model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
 
50
  # Create knowledge base embeddings
51
  knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
52
 
53
+ # Function to retrieve the best context using semantic similarity
54
+ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.5):
55
  # Create embedding for the question
56
  question_embedding = embedding_model.encode(question, convert_to_tensor=True)
57
 
 
62
  best_match_idx = torch.argmax(cosine_scores).item()
63
  best_match_score = cosine_scores[0, best_match_idx].item()
64
 
65
+ if best_match_score > threshold: # Set a threshold for semantic similarity
66
+ best_match_entry = knowledge_base[best_match_idx]
67
+
68
+ # Check if FAQ section exists and prioritize FAQ answers
69
+ for content_item in best_match_entry['content']:
70
+ if 'faq' in content_item:
71
+ for faq in content_item['faq']:
72
+ if faq['question'].lower() in question.lower():
73
+ return faq['answer']
74
+
75
+ # If no FAQ is found, check for steps
76
+ for content_item in best_match_entry['content']:
77
+ if 'steps' in content_item:
78
+ step_details = [step['details'] for step in content_item['steps']]
79
+ return "\n".join(step_details)
80
+
81
+ # Fallback to regular text
82
+ for content_item in best_match_entry['content']:
83
+ if 'text' in content_item:
84
+ return content_item['text']
 
 
 
 
 
 
85
 
86
+ return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
87
 
88
  # Check expanded QA dataset first for a direct answer
89
  def get_answer_from_expanded_qa(question, expanded_qa_dataset):
90
  for item in expanded_qa_dataset:
91
  if item['question'].lower() in question.lower():
 
92
  return item['answer']
93
  return None
94
 
 
 
 
 
 
 
 
 
 
 
95
  # Answer function for the Gradio app
96
  def answer_question(question):
97
  # Check if the question matches any entry in the expanded QA dataset
 
100
  return direct_answer
101
 
102
  # If no direct answer found, use the knowledge base with semantic search
103
+ context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings)
104
  return context
105
 
106
  # Gradio interface setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  interface = gr.Interface(
108
  fn=answer_question,
109
  inputs="text",
 
114
 
115
  # Launch the Gradio interface
116
  interface.launch()