File size: 4,659 Bytes
87b3a2f
ca6370e
87b3a2f
 
 
 
3a05b97
87b3a2f
 
 
 
 
 
 
 
3a05b97
5c44b2c
87b3a2f
 
3a05b97
 
87b3a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca6370e
87b3a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c44b2c
 
4ea05d0
87b3a2f
7f5f0ec
4de8423
3a05b97
5c44b2c
 
 
 
 
 
 
 
 
 
 
 
 
87b3a2f
 
5c44b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
87b3a2f
5c44b2c
 
87b3a2f
5c44b2c
 
 
 
87b3a2f
 
 
5c44b2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from pydantic import NoneStr
import os
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from pypdf import PdfReader
import mimetypes
import validators
import requests
import tempfile
import gradio as gr
import openai


def get_empty_state():
    return {"knowledge_base": None}


def create_knowledge_base(docs):
    # split into chunks
    text_splitter = CharacterTextSplitter(
        separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
    )
    chunks = text_splitter.split_documents(docs)

    # Create embeddings
    embeddings = OpenAIEmbeddings()
    knowledge_base = FAISS.from_documents(chunks, embeddings)
    return knowledge_base


def upload_file(file_obj):
    try:
      loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
      docs = loader.load()

      knowledge_base = create_knowledge_base(docs)
    except:
      text="Try Another file"
      return  file_obj.name, text

    return file_obj.name, {"knowledge_base": knowledge_base}


def upload_via_url(url):
    if validators.url(url):
        r = requests.get(url)

        if r.status_code != 200:
            raise ValueError(
                "Check the url of your file; returned status code %s" % r.status_code
            )

        content_type = r.headers.get("content-type")
        file_extension = mimetypes.guess_extension(content_type)
        temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
        temp_file.write(r.content)
        file_path = temp_file.name
        loader = UnstructuredFileLoader(file_path, strategy="fast")
        docs = loader.load()
        with open(file_path, mode="rb") as f:
            pass
        knowledge_base = create_knowledge_base(docs)
        return file_path, {"knowledge_base": knowledge_base}
    else:
        raise ValueError("Please enter a valid URL")


def answer_question(question, state):

    try:
        knowledge_base = state["knowledge_base"]
        docs = knowledge_base.similarity_search(question)

        llm = OpenAI(temperature=0.4)
        chain = load_qa_chain(llm, chain_type="stuff")
        response = chain.run(input_documents=docs, question=question)
        return response
    except:
        return "Please upload Proper Document"


with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
    state = gr.State(get_empty_state())
    gr.HTML("""<h1 class="leftimage" align="left" style="color:#100C08;font-size:3vw;font-weight: bold;">ADOPLE AI</h1>
    <img class="rightimage" align="right" src="https://www.dmgflooringltd.co.uk/wp-content/uploads/NHS.png" alt="Image" width="180" height="180">""")
    with gr.Column(elem_id="col-container"):
        gr.HTML(
            """<hr style="border-top: 5px solid white;">"""
            )
        gr.HTML(
            """<br>
            <h1 style="text-align:center;">
               NHS Document QA
              </h1> """
        )
        gr.HTML(
            """<hr style="border-top: 5px solid white;">"""
            )

        gr.Markdown("**Upload your file**")
        with gr.Row(elem_id="row-flex"):
            # with gr.Column(scale=0.85):
            #     file_url = gr.Textbox(
            #         value="",
            #         label="Upload your file",
            #         placeholder="Enter a url",
            #         show_label=False,
            #         visible=False
            #     )
            with gr.Column(scale=0.90, min_width=160):
                file_output = gr.File(elem_classes="filenameshow")
            with gr.Column(scale=0.10, min_width=160):
                upload_button = gr.UploadButton(
                    "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
                    elem_classes="filenameshow")
        with gr.Row():
          with gr.Column(scale=1, min_width=0):
            user_question = gr.Textbox(value="",label='Question Box :',show_label=True, placeholder="Ask a question about your file:",elem_classes="spaceH")
        with gr.Row():
          with gr.Column(scale=1, min_width=0):
            answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)

    #file_url.submit(upload_via_url, file_url, [file_output, state])
    upload_button.upload(upload_file, upload_button, [file_output,state])
    user_question.submit(answer_question, [user_question, state], [answer])

demo.queue().launch()