Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from tempfile import NamedTemporaryFile
|
3 |
+
from langchain.document_loaders import PyPDFLoader
|
4 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
+
from langchain.vectorstores import Chroma
|
6 |
+
from langchain import PromptTemplate, LLMChain
|
7 |
+
from langchain.llms import HuggingFaceHub
|
8 |
+
|
9 |
+
# Function to save the uploaded PDF to a temporary file
|
10 |
+
def save_uploaded_file(uploaded_file):
|
11 |
+
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
12 |
+
temp_file.write(uploaded_file.read())
|
13 |
+
return temp_file.name
|
14 |
+
|
15 |
+
# Streamlit UI
|
16 |
+
st.title("PDF Question Answering App")
|
17 |
+
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
18 |
+
if uploaded_file is not None:
|
19 |
+
# Save the uploaded file to a temporary location
|
20 |
+
temp_file_path = save_uploaded_file(uploaded_file)
|
21 |
+
|
22 |
+
# Load the PDF document using PyPDFLoader
|
23 |
+
loader = PyPDFLoader(temp_file_path)
|
24 |
+
pages = loader.load_and_split()
|
25 |
+
|
26 |
+
# Initialize embeddings and Chroma
|
27 |
+
embed = HuggingFaceEmbeddings()
|
28 |
+
db = Chroma.from_documents(pages, embed)
|
29 |
+
|
30 |
+
import os
|
31 |
+
|
32 |
+
# Set the Hugging Face API token as an environment variable
|
33 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
# Initialize the HuggingFaceHub with the appropriate task
|
38 |
+
llm = HuggingFaceHub(
|
39 |
+
repo_id="Manoj21k/GPT4ALL",
|
40 |
+
model_kwargs={"temperature": 1e-10}
|
41 |
+
)
|
42 |
+
|
43 |
+
|
44 |
+
# Define a function to get answers
|
45 |
+
def get_answer(question):
|
46 |
+
doc = db.similarity_search(question, k=4)
|
47 |
+
context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
|
48 |
+
|
49 |
+
max_seq_length = 512 # You may define this based on your model
|
50 |
+
|
51 |
+
context = context[:max_seq_length]
|
52 |
+
|
53 |
+
# Prompt template
|
54 |
+
template = """Use the following pieces of context to answer the question at the end.
|
55 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
56 |
+
Context: {context} and
|
57 |
+
Question: {question}
|
58 |
+
Answer: """
|
59 |
+
|
60 |
+
prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
|
61 |
+
|
62 |
+
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
63 |
+
output = llm_chain.run(context=context, question=question, max_length=512)
|
64 |
+
|
65 |
+
answer_index = output.find("Answer:")
|
66 |
+
next_line_index = output.find("\n", answer_index)
|
67 |
+
answer_content = output[answer_index + len("Answer:")-1:next_line_index]
|
68 |
+
|
69 |
+
return answer_content
|
70 |
+
|
71 |
+
question = st.text_input("Enter your question:")
|
72 |
+
if st.button("Get Answer"):
|
73 |
+
answer = get_answer(question)
|
74 |
+
st.write("Answer:")
|
75 |
+
st.write(answer)
|
76 |
+
|
77 |
+
# Cleanup: Delete the temporary file
|
78 |
+
os.remove(temp_file_path)
|