Spaces:
Running
Running
import nltk | |
from nltk.stem import WordNetLemmatizer | |
import json | |
import pickle | |
import numpy as np | |
from tensorflow.keras.models import load_model | |
import random | |
import warnings | |
import logging | |
import sys | |
import gradio as gr | |
from autocorrect import Speller | |
# Filter warnings | |
warnings.filterwarnings('ignore', category=FutureWarning) | |
warnings.filterwarnings('ignore', category=DeprecationWarning) | |
# Set the default encoding to UTF-8 | |
sys.stdout = sys.__stdout__ = open(sys.__stdout__.fileno(), mode='w', encoding='utf-8', buffering=1) | |
# Initialize necessary components | |
lemmatizer = WordNetLemmatizer() | |
logging.basicConfig(level=logging.INFO) | |
spell = Speller() | |
# Ensure necessary NLTK data is downloaded | |
try: | |
nltk.data.find('tokenizers/punkt') | |
except LookupError: | |
nltk.download('punkt') | |
try: | |
nltk.data.find('corpora/wordnet') | |
except LookupError: | |
nltk.download('wordnet') | |
class modelPredict: | |
def __init__(self, intents_path='Newdata.json', model_path='cbv_model.h5'): | |
self.intents_path = intents_path | |
self.model = load_model(model_path) # Load the model here | |
def clean_up_sentence(self, sentence): | |
sentence_words = nltk.word_tokenize(sentence) | |
sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words] | |
return sentence_words | |
def bow(self, sentence, words, show_details=False): | |
sentence_words = self.clean_up_sentence(sentence) | |
bag = [0] * len(words) | |
for s in sentence_words: | |
for i, w in enumerate(words): | |
if w == s: | |
bag[i] = 1 | |
if show_details: | |
print("Found in bag: %s" % w) | |
return np.array(bag) | |
def predict_class(self, sentence, model, error_threshold=0.25, fallback_threshold=0.60): | |
ERROR_THRESHOLD = error_threshold | |
words = pickle.load(open('words.pkl', 'rb')) | |
classes = pickle.load(open('classes.pkl', 'rb')) | |
p = self.bow(sentence, words, show_details=False) | |
res = model.predict(np.array([p]))[0] | |
results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD] | |
results.sort(key=lambda x: x[1], reverse=True) | |
# Check if the highest confidence is below the fallback threshold | |
if results and results[0][1] < fallback_threshold: | |
return [{"intent": "fallback", "probability": str(results[0][1])}] | |
return_list = [] | |
for r in results: | |
return_list.append({"intent": classes[r[0]], "probability": str(r[1])}) | |
return return_list | |
def getResponse(self, ints, intents_json): | |
tag = ints[0]['intent'] | |
list_of_intents = intents_json['intents'] | |
for i in list_of_intents: | |
if i['tag'] == tag: | |
result = random.choice(i['responses']) | |
break | |
else: | |
result = "I'm sorry, I don't understand. Can you please rephrase?" | |
return result | |
def chatbot_response(self, msg): | |
intents = json.loads(open(self.intents_path).read()) | |
ints = self.predict_class(msg, self.model) | |
print(f"Predicted intents: {ints}") | |
res = self.getResponse(ints, intents) | |
return res | |
# Initialize predictor | |
predictor = modelPredict('Newdata.json', 'cbv_model.h5') | |
# Define Gradio interface function | |
def chatbot_interface(user_input): | |
corrected_sentence = spell(user_input) | |
response = predictor.chatbot_response(corrected_sentence) | |
return response | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=chatbot_interface, | |
inputs="text", | |
outputs="text", | |
title="Chatbot", | |
description="A simple chatbot with spell correction and intent prediction.", | |
) | |
# Launch the Gradio interface | |
iface.launch(share=True) | |