Heraali commited on
Commit
59acace
1 Parent(s): f0dd881

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -10
app.py CHANGED
@@ -50,7 +50,30 @@ def create_knowledge_base_embeddings(knowledge_base):
50
  # Create knowledge base embeddings
51
  knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
52
 
53
- # Function to retrieve the best context using semantic similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
55
  # Create embedding for the question
56
  question_embedding = embedding_model.encode(question, convert_to_tensor=True)
@@ -62,7 +85,10 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
62
  best_match_idx = torch.argmax(cosine_scores).item()
63
  best_match_score = cosine_scores[0, best_match_idx].item()
64
 
65
- if best_match_score > 0.5: # Set a threshold for semantic similarity
 
 
 
66
  best_match_entry = knowledge_base[best_match_idx]
67
 
68
  # Check if FAQ section exists and prioritize FAQ answers
@@ -85,17 +111,30 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
85
 
86
  return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
87
 
88
- # Check expanded QA dataset first for a direct answer
89
- def get_answer_from_expanded_qa(question, expanded_qa_dataset):
90
- for item in expanded_qa_dataset:
91
- if item['question'].lower() in question.lower():
92
- return item['answer']
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return None
94
 
95
- # Answer function for the Gradio app
96
  def answer_question(question):
97
- # Check if the question matches any entry in the expanded QA dataset
98
- direct_answer = get_answer_from_expanded_qa(question, expanded_qa_dataset)
99
  if direct_answer:
100
  return direct_answer
101
 
 
50
  # Create knowledge base embeddings
51
  knowledge_base_embeddings = create_knowledge_base_embeddings(knowledge_base)
52
 
53
+ # Create embeddings for the expanded QA dataset (for semantic matching)
54
+ def create_expanded_qa_embeddings(expanded_qa_dataset):
55
+ qa_embeddings = []
56
+ for item in expanded_qa_dataset:
57
+ qa_embeddings.append({
58
+ "question": item["question"],
59
+ "answer": item["answer"],
60
+ "embedding": embedding_model.encode(item["question"], convert_to_tensor=True)
61
+ })
62
+ return qa_embeddings
63
+
64
+ # Create expanded QA dataset embeddings
65
+ expanded_qa_embeddings = create_expanded_qa_embeddings(expanded_qa_dataset)
66
+
67
+ # Dynamic threshold adjustment based on query length
68
+ def adjust_threshold_based_on_query_length(question_length):
69
+ if question_length <= 5: # Short question, use higher threshold
70
+ return 0.7
71
+ elif 5 < question_length <= 10: # Medium-length question
72
+ return 0.6
73
+ else: # Longer question, use lower threshold
74
+ return 0.5
75
+
76
+ # Function to retrieve the best context using semantic similarity (Knowledge Base)
77
  def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings):
78
  # Create embedding for the question
79
  question_embedding = embedding_model.encode(question, convert_to_tensor=True)
 
85
  best_match_idx = torch.argmax(cosine_scores).item()
86
  best_match_score = cosine_scores[0, best_match_idx].item()
87
 
88
+ # Adjust the threshold based on the query length
89
+ dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
90
+
91
+ if best_match_score > dynamic_threshold: # Use dynamic threshold
92
  best_match_entry = knowledge_base[best_match_idx]
93
 
94
  # Check if FAQ section exists and prioritize FAQ answers
 
111
 
112
  return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
113
 
114
+ # Semantic search in expanded QA dataset
115
+ def get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings):
116
+ # Create embedding for the question
117
+ question_embedding = embedding_model.encode(question, convert_to_tensor=True)
118
+
119
+ # Calculate cosine similarity between the question and the expanded QA dataset
120
+ qa_cosine_scores = [util.pytorch_cos_sim(question_embedding, item["embedding"]) for item in expanded_qa_embeddings]
121
+
122
+ # Find the highest score and return the corresponding answer if similarity is high enough
123
+ best_match_idx = torch.argmax(torch.tensor(qa_cosine_scores)).item()
124
+ best_match_score = qa_cosine_scores[best_match_idx].item()
125
+
126
+ # Dynamic threshold adjustment based on query length
127
+ dynamic_threshold = adjust_threshold_based_on_query_length(len(question.split()))
128
+
129
+ if best_match_score > dynamic_threshold: # Use dynamic threshold
130
+ return expanded_qa_embeddings[best_match_idx]["answer"]
131
+
132
  return None
133
 
134
+ # Check expanded QA dataset first for a direct or semantic match
135
  def answer_question(question):
136
+ # Check for a semantic match in the expanded QA dataset
137
+ direct_answer = get_answer_from_expanded_qa_semantic(question, expanded_qa_embeddings)
138
  if direct_answer:
139
  return direct_answer
140