Heraali commited on
Commit
822876e
1 Parent(s): c63d78d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +9 -7
  2. requirements.txt +6 -5
app.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
  from sentence_transformers import SentenceTransformer, util
4
  import gradio as gr
5
  import json
 
6
 
7
  # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
8
  model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
@@ -69,7 +70,7 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
69
  for content_item in best_match_entry['content']:
70
  if 'faq' in content_item:
71
  for faq in content_item['faq']:
72
- if faq['question'].lower() in question.lower():
73
  return faq['answer']
74
 
75
  # If no FAQ is found, check for steps
@@ -83,12 +84,13 @@ def get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embedd
83
  if 'text' in content_item:
84
  return content_item['text']
85
 
86
- return "Lo siento, no encontré una respuesta adecuada para tu pregunta."
87
 
88
- # Check expanded QA dataset first for a direct answer
89
- def get_answer_from_expanded_qa(question, expanded_qa_dataset):
90
  for item in expanded_qa_dataset:
91
- if item['question'].lower() == question.lower():
 
92
  return item['answer']
93
  return None
94
 
@@ -100,7 +102,7 @@ def answer_question(question):
100
  return direct_answer
101
 
102
  # If no direct answer found, use the knowledge base with semantic search
103
- context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.55)
104
  return context
105
 
106
  # Gradio interface setup
@@ -113,4 +115,4 @@ interface = gr.Interface(
113
  )
114
 
115
  # Launch the Gradio interface
116
- interface.launch()
 
3
  from sentence_transformers import SentenceTransformer, util
4
  import gradio as gr
5
  import json
6
+ from fuzzywuzzy import fuzz
7
 
8
  # Load pre-trained BERT QA model and tokenizer from Hugging Face model hub
9
  model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
 
70
  for content_item in best_match_entry['content']:
71
  if 'faq' in content_item:
72
  for faq in content_item['faq']:
73
+ if fuzz.token_sort_ratio(faq['question'].lower(), question.lower()) > 80:
74
  return faq['answer']
75
 
76
  # If no FAQ is found, check for steps
 
84
  if 'text' in content_item:
85
  return content_item['text']
86
 
87
+ return "Lo siento, no encontré una respuesta adecuada a tu pregunta."
88
 
89
+ # Use fuzzy matching to find the closest match in the expanded QA dataset
90
+ def get_answer_from_expanded_qa(question, expanded_qa_dataset, threshold=80):
91
  for item in expanded_qa_dataset:
92
+ # Use fuzzy matching to find close matches
93
+ if fuzz.token_sort_ratio(item['question'].lower(), question.lower()) > threshold:
94
  return item['answer']
95
  return None
96
 
 
102
  return direct_answer
103
 
104
  # If no direct answer found, use the knowledge base with semantic search
105
+ context = get_dynamic_context_semantic(question, knowledge_base, knowledge_base_embeddings, threshold=0.45)
106
  return context
107
 
108
  # Gradio interface setup
 
115
  )
116
 
117
  # Launch the Gradio interface
118
+ interface.launch(share=True)
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- gradio
2
- torch
3
- transformers
4
- datasets
5
- sentence-transformers
 
 
1
+ transformers==4.25.1
2
+ torch==1.13.1
3
+ sentence-transformers==2.2.2
4
+ fuzzywuzzy==0.18.0
5
+ scikit-learn==1.1.3
6
+ gradio==3.16.2