Manoj21k commited on
Commit
b9c3d84
1 Parent(s): c20f32a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -54
app.py CHANGED
@@ -1,11 +1,7 @@
1
  import streamlit as st
2
  from tempfile import NamedTemporaryFile
3
- from langchain.document_loaders import PyPDFLoader
4
- from langchain.embeddings import HuggingFaceEmbeddings
5
- from langchain.vectorstores import Chroma
6
- from langchain import PromptTemplate, LLMChain
7
- from langchain.llms import HuggingFaceHub
8
  import os
 
9
 
10
  # Function to save the uploaded PDF to a temporary file
11
  def save_uploaded_file(uploaded_file):
@@ -13,17 +9,6 @@ def save_uploaded_file(uploaded_file):
13
  temp_file.write(uploaded_file.read())
14
  return temp_file.name
15
 
16
- # Initialize the model and other resources outside the main function
17
- @st.cache(allow_output_mutation=True)
18
- def initialize_model():
19
- # Initialize the HuggingFaceHub with the appropriate task
20
- llm = HuggingFaceHub(
21
- repo_id="deepset/roberta-base-squad2",
22
- model_kwargs={"temperature": 1e-10},
23
- huggingfacehub_api_token="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"
24
- )
25
- return llm
26
-
27
  # Streamlit UI
28
  st.title("PDF Question Answering App")
29
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
@@ -31,49 +16,29 @@ if uploaded_file is not None:
31
  # Save the uploaded file to a temporary location
32
  temp_file_path = save_uploaded_file(uploaded_file)
33
 
34
- # Load the PDF document using PyPDFLoader
35
- loader = PyPDFLoader(temp_file_path)
36
- pages = loader.load_and_split()
37
-
38
- # Initialize embeddings and Chroma
39
- embed = HuggingFaceEmbeddings()
40
- db = Chroma.from_documents(pages, embed)
41
-
42
- # Load the model using the cached function
43
- llm = initialize_model()
44
 
45
  # Define a function to get answers
46
- def get_answer(question):
47
- doc = db.similarity_search(question, k=4)
48
- context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
49
-
50
- max_seq_length = 512 # You may define this based on your model
51
-
52
- context = context[:max_seq_length]
53
-
54
- # Prompt template
55
- template = """Use the following pieces of context to answer the question at the end.
56
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
57
- Context: {context} and
58
- Question: {question}
59
- Answer: """
60
-
61
- prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
62
-
63
- llm_chain = LLMChain(prompt=prompt, llm=llm)
64
- output = llm_chain.run(context=context, question=question, max_length=512)
65
-
66
- answer_index = output.find("Answer:")
67
- next_line_index = output.find("\n", answer_index)
68
- answer_content = output[answer_index + len("Answer:")-1:next_line_index]
69
-
70
- return answer_content
71
 
72
  question = st.text_input("Enter your question:")
 
73
  if st.button("Get Answer"):
74
- answer = get_answer(question)
75
- st.write("Answer:")
76
- st.write(answer)
 
 
 
77
 
78
  # Cleanup: Delete the temporary file
79
  os.remove(temp_file_path)
 
1
  import streamlit as st
2
  from tempfile import NamedTemporaryFile
 
 
 
 
 
3
  import os
4
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
5
 
6
  # Function to save the uploaded PDF to a temporary file
7
  def save_uploaded_file(uploaded_file):
 
9
  temp_file.write(uploaded_file.read())
10
  return temp_file.name
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Streamlit UI
13
  st.title("PDF Question Answering App")
14
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
 
16
  # Save the uploaded file to a temporary location
17
  temp_file_path = save_uploaded_file(uploaded_file)
18
 
19
+ # Load the pre-trained question-answering model and tokenizer
20
+ model_name = "deepset/roberta-base-squad2"
21
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
23
 
24
  # Define a function to get answers
25
+ def get_answer(question, context):
26
+ inputs = tokenizer(question, context, return_tensors="pt")
27
+ start_scores, end_scores = model(**inputs)
28
+ answer_start = start_scores.argmax()
29
+ answer_end = end_scores.argmax() + 1
30
+ answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
31
+ return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  question = st.text_input("Enter your question:")
34
+ context = st.text_area("Enter the context:")
35
  if st.button("Get Answer"):
36
+ if not context:
37
+ st.warning("Please provide context for the question.")
38
+ else:
39
+ answer = get_answer(question, context)
40
+ st.write("Answer:")
41
+ st.write(answer)
42
 
43
  # Cleanup: Delete the temporary file
44
  os.remove(temp_file_path)