Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -50,7 +50,30 @@ def create_knowledge_base_embeddings(knowledge_base):
|
|
50 |
# Create knowledge base embeddings
|
51 |
knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
|
52 |
|
53 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
|
55 |
# Create embedding for the question
|
56 |
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
@@ -62,7 +85,10 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
|
|
62 |
best_match_idx = torch.argmax(cosine_scores).item()
|
63 |
best_match_score = cosine_scores[0, best_match_idx].item()
|
64 |
|
65 |
-
|
|
|
|
|
|
|
66 |
best_match_entry = knowledge_base[best_match_idx]
|
67 |
|
68 |
# Check if FAQ section exists and prioritize FAQ answers
|
@@ -85,17 +111,30 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
|
|
85 |
|
86 |
return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
|
87 |
|
88 |
-
#
|
89 |
-
def
|
90 |
-
for
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
return None
|
94 |
|
95 |
-
#
|
96 |
def answer_question(question):
|
97 |
-
# Check
|
98 |
-
direct_answer =
|
99 |
if direct_answer:
|
100 |
return direct_answer
|
101 |
|
|
|
50 |
# Create knowledge base embeddings
|
51 |
knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
|
52 |
|
53 |
+
# Create embeddings for the expanded QA dataset (for semantic matching)
|
54 |
+
def create_expanded_qa_embeddings(expanded_qa_dataset):
|
55 |
+
qa_embeddings = []
|
56 |
+
for item in expanded_qa_dataset:
|
57 |
+
qa_embeddings.append({
|
58 |
+
"question": item["question"],
|
59 |
+
"answer": item["answer"],
|
60 |
+
"embedding": embedding_model.encode(item["question"], convert_to_tensor=True)
|
61 |
+
})
|
62 |
+
return qa_embeddings
|
63 |
+
|
64 |
+
# Create expanded QA dataset embeddings
|
65 |
+
expanded_qa_embeddings = create_expanded_qa_embeddings(expanded_qa_dataset)
|
66 |
+
|
67 |
+
# Dynamic threshold adjustment based on query length
|
68 |
+
def adjust_threshold_based_on_query_length(question_length):
|
69 |
+
if question_length <= 5: # Short question, use higher threshold
|
70 |
+
return 0.7
|
71 |
+
elif 5 < question_length <= 10: # Medium-length question
|
72 |
+
return 0.6
|
73 |
+
else: # Longer question, use lower threshold
|
74 |
+
return 0.5
|
75 |
+
|
76 |
+
# Function to retrieve the best context using semantic similarity (Knowledge Base)
|
77 |
def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
|
78 |
# Create embedding for the question
|
79 |
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
|
|
85 |
best_match_idx = torch.argmax(cosine_scores).item()
|
86 |
best_match_score = cosine_scores[0, best_match_idx].item()
|
87 |
|
88 |
+
# Adjust the threshold based on the query length
|
89 |
+
dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
|
90 |
+
|
91 |
+
if best_match_score > dynamic_threshold: # Use dynamic threshold
|
92 |
best_match_entry = knowledge_base[best_match_idx]
|
93 |
|
94 |
# Check if FAQ section exists and prioritize FAQ answers
|
|
|
111 |
|
112 |
return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
|
113 |
|
114 |
+
# Semantic search in expanded QA dataset
|
115 |
+
def get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings):
|
116 |
+
# Create embedding for the question
|
117 |
+
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
118 |
+
|
119 |
+
# Calculate cosine similarity between the question and the expanded QA dataset
|
120 |
+
qa_cosine_scores = [util.pytorch_cos_sim(question_embedding, item["embedding"]) for item in expanded_qa_embeddings]
|
121 |
+
|
122 |
+
# Find the highest score and return the corresponding answer if similarity is high enough
|
123 |
+
best_match_idx = torch.argmax(torch.tensor(qa_cosine_scores)).item()
|
124 |
+
best_match_score = qa_cosine_scores[best_match_idx].item()
|
125 |
+
|
126 |
+
# Dynamic threshold adjustment based on query length
|
127 |
+
dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
|
128 |
+
|
129 |
+
if best_match_score > dynamic_threshold: # Use dynamic threshold
|
130 |
+
return expanded_qa_embeddings[best_match_idx]["answer"]
|
131 |
+
|
132 |
return None
|
133 |
|
134 |
+
# Check expanded QA dataset first for a direct or semantic match
|
135 |
def answer_question(question):
|
136 |
+
# Check for a semantic match in the expanded QA dataset
|
137 |
+
direct_answer = get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings)
|
138 |
if direct_answer:
|
139 |
return direct_answer
|
140 |
|