vivekvar commited on
Commit
4098cd5
1 Parent(s): 3cb2c45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -102
app.py CHANGED
@@ -1,102 +1,102 @@
1
- import streamlit as st
2
- from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
- from dotenv import load_dotenv
5
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
- from llama_index.core import Settings
7
- import os
8
- import base64
9
- import altair as alt
10
-
11
- # Load environment variables
12
- load_dotenv()
13
-
14
- # Configure the Llama index settings
15
- Settings.llm = HuggingFaceInferenceAPI(
16
- model_name="google/gemma-1.1-7b-it",
17
- tokenizer_name="google/gemma-1.1-7b-it",
18
- context_window=3000,
19
- token=os.getenv("HF_TOKEN"),
20
- max_new_tokens=512,
21
- generate_kwargs={"temperature": 0.1},
22
- )
23
- Settings.embed_model = HuggingFaceEmbedding(
24
- model_name="BAAI/bge-small-en-v1.5"
25
- )
26
-
27
- # Define the directory for persistent storage and data
28
- PERSIST_DIR = "./db"
29
- DATA_DIR = "data"
30
-
31
- # Ensure data directory exists
32
- os.makedirs(DATA_DIR, exist_ok=True)
33
- os.makedirs(PERSIST_DIR, exist_ok=True)
34
-
35
- def displayPDF(file):
36
- with open(file, "rb") as f:
37
- base64_pdf = base64.b64encode(f.read()).decode('utf-8')
38
- pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
39
- st.markdown(pdf_display, unsafe_allow_html=True)
40
-
41
- def data_ingestion():
42
- documents = SimpleDirectoryReader(DATA_DIR).load_data()
43
- storage_context = StorageContext.from_defaults()
44
- index = VectorStoreIndex.from_documents(documents)
45
- index.storage_context.persist(persist_dir=PERSIST_DIR)
46
-
47
- def handle_query(query):
48
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
49
- index = load_index_from_storage(storage_context)
50
- chat_text_qa_msgs = [
51
- (
52
- "user",
53
- """created by vivek created for Neonflake Enterprises OPC Pvt Ltd
54
- Context:
55
- {context_str}
56
- Question:
57
- {query_str}
58
- """
59
- )
60
- ]
61
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
62
-
63
- query_engine = index.as_query_engine(text_qa_template=text_qa_template)
64
- answer = query_engine.query(query)
65
-
66
- if hasattr(answer, 'response'):
67
- return answer.response
68
- elif isinstance(answer, dict) and 'response' in answer:
69
- return answer['response']
70
- else:
71
- return "Sorry, I couldn't find an answer."
72
-
73
-
74
- # Streamlit app initialization
75
- st.title("Chat with your PDF📄")
76
- st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
77
- st.markdown("chat here")
78
-
79
- if 'messages' not in st.session_state:
80
- st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
81
-
82
- with st.sidebar:
83
- st.title("Menu:")
84
- uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
85
- if st.button("Submit & Process"):
86
- with st.spinner("Processing..."):
87
- filepath = "data/saved_pdf.pdf"
88
- with open(filepath, "wb") as f:
89
- f.write(uploaded_file.getbuffer())
90
- # displayPDF(filepath) # Display the uploaded PDF
91
- data_ingestion() # Process PDF every time new file is uploaded
92
- st.success("Done")
93
-
94
- user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
95
- if user_prompt:
96
- st.session_state.messages.append({'role': 'user', "content": user_prompt})
97
- response = handle_query(user_prompt)
98
- st.session_state.messages.append({'role': 'assistant', "content": response})
99
-
100
- for message in st.session_state.messages:
101
- with st.chat_message(message['role']):
102
- st.write(message['content'])
 
1
+ import streamlit as st
2
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
+ from dotenv import load_dotenv
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from llama_index.core import Settings
7
+ import os
8
+ import base64
9
+ import altair as alt
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Configure the Llama index settings
15
+ Settings.llm = HuggingFaceInferenceAPI(
16
+ model_name="google/gemma-1.1-7b-it",
17
+ tokenizer_name="google/gemma-1.1-7b-it",
18
+ context_window=3000,
19
+ token=os.getenv("HF_TOKEN"),
20
+ max_new_tokens=512,
21
+ generate_kwargs={"temperature": 0.1},
22
+ )
23
+ Settings.embed_model = HuggingFaceEmbedding(
24
+ model_name="BAAI/bge-small-en-v1.5"
25
+ )
26
+
27
+ # Define the directory for persistent storage and data
28
+ PERSIST_DIR = "./db"
29
+ DATA_DIR = "data"
30
+
31
+ # Ensure data directory exists
32
+ os.makedirs(DATA_DIR, exist_ok=True)
33
+ os.makedirs(PERSIST_DIR, exist_ok=True)
34
+
35
+ def displayPDF(file):
36
+ with open(file, "rb") as f:
37
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
38
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
39
+ st.markdown(pdf_display, unsafe_allow_html=True)
40
+
41
+ def data_ingestion():
42
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
43
+ storage_context = StorageContext.from_defaults()
44
+ index = VectorStoreIndex.from_documents(documents)
45
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
46
+
47
+ def handle_query(query):
48
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
49
+ index = load_index_from_storage(storage_context)
50
+ chat_text_qa_msgs = [
51
+ (
52
+ "user",
53
+ """created by vivek created for Neonflake Enterprises OPC Pvt Ltd
54
+ Context:
55
+ {context_str}
56
+ Question:
57
+ {query_str}
58
+ """
59
+ )
60
+ ]
61
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
62
+
63
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
64
+ answer = query_engine.query(query)
65
+
66
+ if hasattr(answer, 'response'):
67
+ return answer.response
68
+ elif isinstance(answer, dict) and 'response' in answer:
69
+ return answer['response']
70
+ else:
71
+ return "Sorry, I couldn't find an answer."
72
+
73
+
74
+ # Streamlit app initialization
75
+ st.title("Chat with your PDF📄")
76
+ st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
77
+ st.markdown("chat here")
78
+
79
+ if 'messages' not in st.session_state:
80
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
81
+
82
+ with st.sidebar:
83
+ st.title("Menu:")
84
+ uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
85
+ if st.button("Submit & Process"):
86
+ with st.spinner("Processing..."):
87
+ filepath = "data/saved_pdf.pdf"
88
+ with open(filepath, "wb") as f:
89
+ f.write(uploaded_file.getbuffer())
90
+ # displayPDF(filepath) # Display the uploaded PDF
91
+ data_ingestion() # Process PDF every time new file is uploaded
92
+ st.success("Done")
93
+
94
+ user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
95
+ if user_prompt:
96
+ st.session_state.messages.append({'role': 'user', "content": user_prompt})
97
+ response = handle_query(user_prompt)
98
+ st.session_state.messages.append({'role': 'assistant', "content": response})
99
+
100
+ for message in st.session_state.messages:
101
+ with st.chat_message(message['role']):
102
+ st.write(message['content'])