Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,6 @@ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
import gradio as gr
|
5 |
import json
|
6 |
-
import logging
|
7 |
-
|
8 |
-
# Setup logging
|
9 |
-
logging.basicConfig(filename='chatbot_logs.log', level=logging.INFO)
|
10 |
|
11 |
# Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
|
12 |
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
|
@@ -54,8 +50,8 @@ def create_knowledge_base_embeddings(knowledge_base):
|
|
54 |
# Create knowledge base embeddings
|
55 |
knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
|
56 |
|
57 |
-
# Function to retrieve the best context using semantic similarity
|
58 |
-
def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.
|
59 |
# Create embedding for the question
|
60 |
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
61 |
|
@@ -66,53 +62,36 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
|
|
66 |
best_match_idx = torch.argmax(cosine_scores).item()
|
67 |
best_match_score = cosine_scores[0, best_match_idx].item()
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
return "\n".join(step_details)
|
90 |
-
|
91 |
-
# Fallback to regular text
|
92 |
-
for content_item in best_match_entry['content']:
|
93 |
-
if 'text' in content_item:
|
94 |
-
return content_item['text']
|
95 |
|
96 |
-
return "Lo siento, no encontré una respuesta adecuada
|
97 |
|
98 |
# Check expanded QA dataset first for a direct answer
|
99 |
def get_answer_from_expanded_qa(question, expanded_qa_dataset):
|
100 |
for item in expanded_qa_dataset:
|
101 |
if item['question'].lower() in question.lower():
|
102 |
-
logging.info(f"Direct match found in expanded QA dataset for question: {question}")
|
103 |
return item['answer']
|
104 |
return None
|
105 |
|
106 |
-
# Collect user feedback for improving the model (Placeholder for future enhancement)
|
107 |
-
def collect_user_feedback(question, user_answer, correct_answer, feedback):
|
108 |
-
# Placeholder: Save feedback to a file or database
|
109 |
-
with open('user_feedback.log', 'a') as feedback_log:
|
110 |
-
feedback_log.write(f"Question: {question}\n")
|
111 |
-
feedback_log.write(f"User Answer: {user_answer}\n")
|
112 |
-
feedback_log.write(f"Correct Answer: {correct_answer}\n")
|
113 |
-
feedback_log.write(f"Feedback: {feedback}\n\n")
|
114 |
-
logging.info(f"Feedback collected for question: {question}")
|
115 |
-
|
116 |
# Answer function for the Gradio app
|
117 |
def answer_question(question):
|
118 |
# Check if the question matches any entry in the expanded QA dataset
|
@@ -121,24 +100,10 @@ def answer_question(question):
|
|
121 |
return direct_answer
|
122 |
|
123 |
# If no direct answer found, use the knowledge base with semantic search
|
124 |
-
context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings
|
125 |
return context
|
126 |
|
127 |
# Gradio interface setup
|
128 |
-
def feedback_interface(question, user_answer, correct_answer, feedback):
|
129 |
-
collect_user_feedback(question, user_answer, correct_answer, feedback)
|
130 |
-
return "Thank you for your feedback!"
|
131 |
-
|
132 |
-
# Gradio interface setup for feedback collection
|
133 |
-
feedback_gr = gr.Interface(
|
134 |
-
fn=feedback_interface,
|
135 |
-
inputs=["text", "text", "text", "text"],
|
136 |
-
outputs="text",
|
137 |
-
title="Feedback Collection",
|
138 |
-
description="Submit feedback on the chatbot responses."
|
139 |
-
)
|
140 |
-
|
141 |
-
# Main interface
|
142 |
interface = gr.Interface(
|
143 |
fn=answer_question,
|
144 |
inputs="text",
|
@@ -149,6 +114,3 @@ interface = gr.Interface(
|
|
149 |
|
150 |
# Launch the Gradio interface
|
151 |
interface.launch()
|
152 |
-
|
153 |
-
# Launch the feedback interface separately
|
154 |
-
feedback_gr.launch(share=True)
|
|
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
import gradio as gr
|
5 |
import json
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
|
8 |
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
|
|
|
50 |
# Create knowledge base embeddings
|
51 |
knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
|
52 |
|
53 |
+
# Function to retrieve the best context using semantic similarity
|
54 |
+
def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.5):
|
55 |
# Create embedding for the question
|
56 |
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
57 |
|
|
|
62 |
best_match_idx = torch.argmax(cosine_scores).item()
|
63 |
best_match_score = cosine_scores[0, best_match_idx].item()
|
64 |
|
65 |
+
if best_match_score > threshold: # Set a threshold for semantic similarity
|
66 |
+
best_match_entry = knowledge_base[best_match_idx]
|
67 |
+
|
68 |
+
# Check if FAQ section exists and prioritize FAQ answers
|
69 |
+
for content_item in best_match_entry['content']:
|
70 |
+
if 'faq' in content_item:
|
71 |
+
for faq in content_item['faq']:
|
72 |
+
if faq['question'].lower() in question.lower():
|
73 |
+
return faq['answer']
|
74 |
+
|
75 |
+
# If no FAQ is found, check for steps
|
76 |
+
for content_item in best_match_entry['content']:
|
77 |
+
if 'steps' in content_item:
|
78 |
+
step_details = [step['details'] for step in content_item['steps']]
|
79 |
+
return "\n".join(step_details)
|
80 |
+
|
81 |
+
# Fallback to regular text
|
82 |
+
for content_item in best_match_entry['content']:
|
83 |
+
if 'text' in content_item:
|
84 |
+
return content_item['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
|
87 |
|
88 |
# Check expanded QA dataset first for a direct answer
|
89 |
def get_answer_from_expanded_qa(question, expanded_qa_dataset):
|
90 |
for item in expanded_qa_dataset:
|
91 |
if item['question'].lower() in question.lower():
|
|
|
92 |
return item['answer']
|
93 |
return None
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# Answer function for the Gradio app
|
96 |
def answer_question(question):
|
97 |
# Check if the question matches any entry in the expanded QA dataset
|
|
|
100 |
return direct_answer
|
101 |
|
102 |
# If no direct answer found, use the knowledge base with semantic search
|
103 |
+
context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings)
|
104 |
return context
|
105 |
|
106 |
# Gradio interface setup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
interface = gr.Interface(
|
108 |
fn=answer_question,
|
109 |
inputs="text",
|
|
|
114 |
|
115 |
# Launch the Gradio interface
|
116 |
interface.launch()
|
|
|
|
|
|