Manoj21k commited on
Commit
405d7a9
1 Parent(s): b9c3d84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -19
app.py CHANGED
@@ -1,6 +1,12 @@
1
  import streamlit as st
2
- from tempfile import NamedTemporaryFile
3
  import os
 
 
 
 
 
 
 
4
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
5
 
6
  # Function to save the uploaded PDF to a temporary file
@@ -16,29 +22,52 @@ if uploaded_file is not None:
16
  # Save the uploaded file to a temporary location
17
  temp_file_path = save_uploaded_file(uploaded_file)
18
 
19
- # Load the pre-trained question-answering model and tokenizer
20
- model_name = "deepset/roberta-base-squad2"
21
- model = AutoModelForQuestionAnswering.from_pretrained(model_name)
22
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
23
 
24
  # Define a function to get answers
25
- def get_answer(question, context):
26
- inputs = tokenizer(question, context, return_tensors="pt")
27
- start_scores, end_scores = model(**inputs)
28
- answer_start = start_scores.argmax()
29
- answer_end = end_scores.argmax() + 1
30
- answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
31
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  question = st.text_input("Enter your question:")
34
- context = st.text_area("Enter the context:")
35
  if st.button("Get Answer"):
36
- if not context:
37
- st.warning("Please provide context for the question.")
38
- else:
39
- answer = get_answer(question, context)
40
- st.write("Answer:")
41
- st.write(answer)
42
 
43
  # Cleanup: Delete the temporary file
44
  os.remove(temp_file_path)
 
1
  import streamlit as st
 
2
  import os
3
+ from tempfile import NamedTemporaryFile
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.llms import GPT4All
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain import PromptTemplate, LLMChain
10
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
11
 
12
  # Function to save the uploaded PDF to a temporary file
 
22
  # Save the uploaded file to a temporary location
23
  temp_file_path = save_uploaded_file(uploaded_file)
24
 
25
+ # Load the PDF document using PyPDFLoader
26
+ loader = PyPDFLoader(temp_file_path)
27
+ pages = loader.load_and_split()
28
+
29
+ # Initialize embeddings and Chroma
30
+ embed = HuggingFaceEmbeddings()
31
+ db = Chroma.from_documents(pages, embed)
32
+
33
+ # Initialize the GPT-4 model
34
+ llm = GPT4All(
35
+ model="./gpt4all-converted.bin"
36
+ )
37
 
38
  # Define a function to get answers
39
+ def get_answer(question):
40
+ doc = db.similarity_search(question, k=4)
41
+ context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
42
+
43
+ max_seq_length = 512 # You may define this based on your model
44
+
45
+ context = context[:max_seq_length]
46
+
47
+ # Load the model & tokenizer for question-answering
48
+ model_name = "deepset/roberta-base-squad2"
49
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
50
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
51
+
52
+ # Create a question-answering pipeline
53
+ nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)
54
+
55
+ # Prepare the input
56
+ QA_input = {
57
+ "question": question,
58
+ "context": context,
59
+ }
60
+
61
+ # Get the answer
62
+ result = nlp(**QA_input)
63
+
64
+ return result["answer"]
65
 
66
  question = st.text_input("Enter your question:")
 
67
  if st.button("Get Answer"):
68
+ answer = get_answer(question)
69
+ st.write("Answer:")
70
+ st.write(answer)
 
 
 
71
 
72
  # Cleanup: Delete the temporary file
73
  os.remove(temp_file_path)