Spaces:
Paused
Paused
jeevan
commited on
Commit
•
bc453aa
0
Parent(s):
recommit
Browse files- .chainlit/config.toml +84 -0
- .gitignore +8 -0
- Dockerfile +11 -0
- app.py +271 -0
- chainlit.md +3 -0
- embedding_model.py +58 -0
- pdfloader.py +27 -0
- pre-processing.ipynb +595 -0
- requirements.txt +15 -0
.chainlit/config.toml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
3 |
+
enable_telemetry = true
|
4 |
+
|
5 |
+
# List of environment variables to be provided by each user to use the app.
|
6 |
+
user_env = []
|
7 |
+
|
8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
9 |
+
session_timeout = 3600
|
10 |
+
|
11 |
+
# Enable third parties caching (e.g LangChain cache)
|
12 |
+
cache = false
|
13 |
+
|
14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
15 |
+
# follow_symlink = false
|
16 |
+
|
17 |
+
[features]
|
18 |
+
# Show the prompt playground
|
19 |
+
prompt_playground = true
|
20 |
+
|
21 |
+
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
|
22 |
+
unsafe_allow_html = false
|
23 |
+
|
24 |
+
# Process and display mathematical expressions. This can clash with "$" characters in messages.
|
25 |
+
latex = false
|
26 |
+
|
27 |
+
# Authorize users to upload files with messages
|
28 |
+
multi_modal = true
|
29 |
+
|
30 |
+
# Allows user to use speech to text
|
31 |
+
[features.speech_to_text]
|
32 |
+
enabled = false
|
33 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
34 |
+
# language = "en-US"
|
35 |
+
|
36 |
+
[UI]
|
37 |
+
# Name of the app and chatbot.
|
38 |
+
name = "Chatbot"
|
39 |
+
|
40 |
+
# Show the readme while the conversation is empty.
|
41 |
+
show_readme_as_default = true
|
42 |
+
|
43 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
44 |
+
# description = ""
|
45 |
+
|
46 |
+
# Large size content are by default collapsed for a cleaner ui
|
47 |
+
default_collapse_content = true
|
48 |
+
|
49 |
+
# The default value for the expand messages settings.
|
50 |
+
default_expand_messages = false
|
51 |
+
|
52 |
+
# Hide the chain of thought details from the user in the UI.
|
53 |
+
hide_cot = false
|
54 |
+
|
55 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
56 |
+
# github = ""
|
57 |
+
|
58 |
+
# Specify a CSS file that can be used to customize the user interface.
|
59 |
+
# The CSS file can be served from the public directory or via an external link.
|
60 |
+
# custom_css = "/public/test.css"
|
61 |
+
|
62 |
+
# Override default MUI light theme. (Check theme.ts)
|
63 |
+
[UI.theme.light]
|
64 |
+
#background = "#FAFAFA"
|
65 |
+
#paper = "#FFFFFF"
|
66 |
+
|
67 |
+
[UI.theme.light.primary]
|
68 |
+
#main = "#F80061"
|
69 |
+
#dark = "#980039"
|
70 |
+
#light = "#FFE7EB"
|
71 |
+
|
72 |
+
# Override default MUI dark theme. (Check theme.ts)
|
73 |
+
[UI.theme.dark]
|
74 |
+
#background = "#FAFAFA"
|
75 |
+
#paper = "#FFFFFF"
|
76 |
+
|
77 |
+
[UI.theme.dark.primary]
|
78 |
+
#main = "#F80061"
|
79 |
+
#dark = "#980039"
|
80 |
+
#light = "#FFE7EB"
|
81 |
+
|
82 |
+
|
83 |
+
[meta]
|
84 |
+
generated_by = "0.7.700"
|
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/venv
|
2 |
+
__pycache__/*
|
3 |
+
.env
|
4 |
+
download-hf-model.ipynb
|
5 |
+
temp
|
6 |
+
start_qdrant_services.sh
|
7 |
+
requirements copy.txt
|
8 |
+
Dockerfile copy
|
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11.9
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
ENV HOME=/home/user \
|
5 |
+
PATH=/home/user/.local/bin:$PATH
|
6 |
+
WORKDIR $HOME/app
|
7 |
+
COPY --chown=user . $HOME/app
|
8 |
+
COPY ./requirements.txt ~/app/requirements.txt
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
+
COPY . .
|
11 |
+
CMD ["chainlit", "run", "app.py", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List
|
3 |
+
import uuid
|
4 |
+
import chainlit as cl
|
5 |
+
from chainlit.types import AskFileResponse
|
6 |
+
from langchain.memory import ConversationBufferMemory
|
7 |
+
from langchain_core.chat_history import BaseChatMessageHistory
|
8 |
+
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
|
9 |
+
from langchain.prompts import MessagesPlaceholder
|
10 |
+
from langchain.prompts import ChatPromptTemplate
|
11 |
+
from langchain_community.chat_message_histories import ChatMessageHistory
|
12 |
+
from langchain.chains.history_aware_retriever import create_history_aware_retriever
|
13 |
+
from langchain.chains.retrieval import create_retrieval_chain
|
14 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
15 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
+
from langchain_experimental.text_splitter import SemanticChunker
|
17 |
+
from langchain_qdrant import QdrantVectorStore
|
18 |
+
from langchain_core.documents import Document
|
19 |
+
from qdrant_client import QdrantClient
|
20 |
+
from qdrant_client.http.models import Distance, VectorParams
|
21 |
+
from langchain_openai import ChatOpenAI
|
22 |
+
from embedding_model import get_embeddings_openai_text_3_large,get_embeddings_snowflake_arctic_embed_l
|
23 |
+
from pdfloader import PDFLoaderWrapper
|
24 |
+
from langchain_core.runnables.history import RunnableWithMessageHistory
|
25 |
+
from chainlit.input_widget import Select, Switch, Slider
|
26 |
+
from dotenv import load_dotenv
|
27 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
28 |
+
|
29 |
+
load_dotenv()
|
30 |
+
|
31 |
+
BOR_FILE_PATH = "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
|
32 |
+
NIST_FILE_PATH = "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"
|
33 |
+
SMALL_DOC = "https://arxiv.org/pdf/1908.10084" # 11 pages Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
|
34 |
+
documents_to_preload = [
|
35 |
+
BOR_FILE_PATH,
|
36 |
+
NIST_FILE_PATH
|
37 |
+
# SMALL_DOC
|
38 |
+
]
|
39 |
+
collection_name = "ai-safety"
|
40 |
+
|
41 |
+
welcome_message = """
|
42 |
+
Welcome to the chatbot to clarify all your AI Safety related queries.:
|
43 |
+
Now preloading below documents:
|
44 |
+
1. Blueprint for an AI Bill of Rights
|
45 |
+
2. NIST AI Standards
|
46 |
+
Please wait for a moment to load the documents.
|
47 |
+
"""
|
48 |
+
chat_model_name = "gpt-4o"
|
49 |
+
embedding_model_name = "Snowflake/snowflake-arctic-embed-l"
|
50 |
+
chat_model = ChatOpenAI(model=chat_model_name, temperature=0)
|
51 |
+
|
52 |
+
async def connect_to_qdrant():
|
53 |
+
embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_name)
|
54 |
+
qdrant_url = os.environ["QDRANT_URL"]
|
55 |
+
qdrant_api_key = os.environ["QDRANT_API_KEY"]
|
56 |
+
collection_name = os.environ["COLLECTION_NAME"]
|
57 |
+
qdrant_client = QdrantClient(url=qdrant_url,api_key=qdrant_api_key)
|
58 |
+
vector_store = QdrantVectorStore(
|
59 |
+
client=qdrant_client,
|
60 |
+
collection_name=collection_name,
|
61 |
+
embedding=embedding_model,
|
62 |
+
)
|
63 |
+
return vector_store.as_retriever()
|
64 |
+
|
65 |
+
def initialize_vectorstore(
|
66 |
+
collection_name: str,
|
67 |
+
embedding_model,
|
68 |
+
dimension,
|
69 |
+
distance_metric: Distance = Distance.COSINE,
|
70 |
+
):
|
71 |
+
client = QdrantClient(":memory:")
|
72 |
+
client.create_collection(
|
73 |
+
collection_name=collection_name,
|
74 |
+
vectors_config=VectorParams(size=dimension, distance=distance_metric),
|
75 |
+
)
|
76 |
+
|
77 |
+
vector_store = QdrantVectorStore(
|
78 |
+
client=client,
|
79 |
+
collection_name=collection_name,
|
80 |
+
embedding=embedding_model,
|
81 |
+
)
|
82 |
+
return vector_store
|
83 |
+
|
84 |
+
def get_text_splitter(strategy, embedding_model):
|
85 |
+
if strategy == "semantic":
|
86 |
+
return SemanticChunker(
|
87 |
+
embedding_model,
|
88 |
+
breakpoint_threshold_type="percentile",
|
89 |
+
breakpoint_threshold_amount=90,
|
90 |
+
)
|
91 |
+
|
92 |
+
def process_file(file: AskFileResponse, text_splitter):
|
93 |
+
if file.type == "text/plain":
|
94 |
+
Loader = TextLoader
|
95 |
+
elif file.type == "application/pdf":
|
96 |
+
Loader = PyMuPDFLoader
|
97 |
+
|
98 |
+
loader = Loader(file.path)
|
99 |
+
documents = loader.load()
|
100 |
+
title = documents[0].metadata.get("title")
|
101 |
+
docs = text_splitter.split_documents(documents)
|
102 |
+
for i, doc in enumerate(docs):
|
103 |
+
doc.metadata["source"] = f"source_{i}"
|
104 |
+
doc.metadata["title"] = title
|
105 |
+
return docs
|
106 |
+
|
107 |
+
def populate_vectorstore(vector_store, docs: List[Document]):
|
108 |
+
vector_store.add_documents(docs)
|
109 |
+
return vector_store
|
110 |
+
|
111 |
+
def create_history_aware_retriever_self(chat_model, retriever):
|
112 |
+
contextualize_q_system_prompt = (
|
113 |
+
"Given a chat history and the latest user question which might reference context in the chat history, "
|
114 |
+
"formulate a standalone question which can be understood without the chat history. Do NOT answer the question, "
|
115 |
+
"just reformulate it if needed and otherwise return it as is."
|
116 |
+
)
|
117 |
+
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
118 |
+
[
|
119 |
+
("system", contextualize_q_system_prompt),
|
120 |
+
MessagesPlaceholder("chat_history"),
|
121 |
+
("human", "{input}"),
|
122 |
+
]
|
123 |
+
)
|
124 |
+
return create_history_aware_retriever(chat_model, retriever, contextualize_q_prompt)
|
125 |
+
|
126 |
+
def create_qa_chain(chat_model):
|
127 |
+
qa_system_prompt = (
|
128 |
+
"You are an helpful assistant named 'Shield' and your task is to answer any questions related to AI Safety for the given context."
|
129 |
+
"Use the following pieces of retrieved context to answer the question."
|
130 |
+
# "If any questions asked outside AI Safety context, just say that you are a specialist in AI Safety and can't answer that."
|
131 |
+
# f"When introducing you, just say that you are an AI assistant powered by embedding model {embedding_model_name} and chat model {chat_model_name} and your knowledge is limited to 'Blueprint for an AI Bill of Rights' and 'NIST AI Standards' documents."
|
132 |
+
"If you don't know the answer, just say that you don't know.\n\n"
|
133 |
+
"{context}"
|
134 |
+
)
|
135 |
+
qa_prompt = ChatPromptTemplate.from_messages(
|
136 |
+
[
|
137 |
+
("system", qa_system_prompt),
|
138 |
+
MessagesPlaceholder("chat_history"),
|
139 |
+
("human", "{input}"),
|
140 |
+
]
|
141 |
+
)
|
142 |
+
return create_stuff_documents_chain(chat_model, qa_prompt)
|
143 |
+
|
144 |
+
|
145 |
+
def create_rag_chain(chat_model, retriever):
|
146 |
+
history_aware_retriever = create_history_aware_retriever_self(chat_model, retriever)
|
147 |
+
question_answer_chain = create_qa_chain(chat_model)
|
148 |
+
return create_retrieval_chain(history_aware_retriever, question_answer_chain)
|
149 |
+
|
150 |
+
|
151 |
+
def create_session_id():
|
152 |
+
session_id = str(uuid.uuid4())
|
153 |
+
return session_id
|
154 |
+
|
155 |
+
|
156 |
+
@cl.on_chat_start
|
157 |
+
async def start():
|
158 |
+
# cl.user_session.set("memory", conversation_buffer_memory)
|
159 |
+
msg = cl.Message(content=welcome_message)
|
160 |
+
await msg.send()
|
161 |
+
|
162 |
+
# Create a session id
|
163 |
+
session_id = create_session_id()
|
164 |
+
cl.user_session.set("session_id", session_id)
|
165 |
+
|
166 |
+
# Preserve chat history
|
167 |
+
conversation_buffer_memory = ConversationBufferMemory(
|
168 |
+
memory_key="chat_history",
|
169 |
+
output_key="answer",
|
170 |
+
chat_memory=ChatMessageHistory(),
|
171 |
+
return_messages=True,
|
172 |
+
)
|
173 |
+
|
174 |
+
# todo: if logged in user is admin then allow them to upload new pdfs.
|
175 |
+
|
176 |
+
# # Embedding model
|
177 |
+
# # embedding_model, dimension = get_embeddings_openai_text_3_large()
|
178 |
+
# embedding_model, dimension = get_embeddings_snowflake_arctic_embed_l()
|
179 |
+
# msg.content = "Embedding model loaded"
|
180 |
+
# await msg.update()
|
181 |
+
# cl.user_session.set("embedding_model", embedding_model)
|
182 |
+
# cl.user_session.set("dimension", dimension)
|
183 |
+
|
184 |
+
# # Pdf loader
|
185 |
+
# pdf_loader = PDFLoaderWrapper(
|
186 |
+
# documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF
|
187 |
+
# )
|
188 |
+
# msg.content = "Embedding model loaded"
|
189 |
+
# await msg.update()
|
190 |
+
# cl.user_session.set("pdf_loader", pdf_loader)
|
191 |
+
# documents = await pdf_loader.aload()
|
192 |
+
|
193 |
+
# text_splitter = get_text_splitter("semantic", embedding_model)
|
194 |
+
|
195 |
+
# chunked_docs = text_splitter.split_documents(documents)
|
196 |
+
|
197 |
+
# vector_store = initialize_vectorstore(
|
198 |
+
# collection_name, embedding_model, dimension=dimension
|
199 |
+
# )
|
200 |
+
|
201 |
+
# vector_store = populate_vectorstore(vector_store, chunked_docs)
|
202 |
+
|
203 |
+
retriever = await connect_to_qdrant()
|
204 |
+
|
205 |
+
rag_chain = create_rag_chain(chat_model, retriever)
|
206 |
+
|
207 |
+
store = {}
|
208 |
+
|
209 |
+
def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
210 |
+
if session_id not in store:
|
211 |
+
store[session_id] = ChatMessageHistory()
|
212 |
+
return store[session_id]
|
213 |
+
|
214 |
+
conversational_rag_chain = RunnableWithMessageHistory(
|
215 |
+
rag_chain,
|
216 |
+
get_session_history,
|
217 |
+
input_messages_key="input",
|
218 |
+
history_messages_key="chat_history",
|
219 |
+
output_messages_key="answer",
|
220 |
+
)
|
221 |
+
|
222 |
+
# Let the user know that the system is ready
|
223 |
+
msg.content = msg.content + "\nReady to answer your questions!"
|
224 |
+
await msg.update()
|
225 |
+
|
226 |
+
cl.user_session.set("conversational_rag_chain", conversational_rag_chain)
|
227 |
+
|
228 |
+
|
229 |
+
@cl.on_message
|
230 |
+
async def main(message: cl.Message):
|
231 |
+
session_id = cl.user_session.get("session_id")
|
232 |
+
conversational_rag_chain = cl.user_session.get("conversational_rag_chain")
|
233 |
+
|
234 |
+
response = await conversational_rag_chain.ainvoke(
|
235 |
+
{"input": message.content},
|
236 |
+
config={"configurable": {"session_id": session_id},
|
237 |
+
"callbacks":[cl.AsyncLangchainCallbackHandler()]},
|
238 |
+
)
|
239 |
+
answer = response["answer"]
|
240 |
+
|
241 |
+
source_documents = response["context"]
|
242 |
+
text_elements = []
|
243 |
+
unique_pages = set()
|
244 |
+
|
245 |
+
if source_documents:
|
246 |
+
|
247 |
+
for source_idx, source_doc in enumerate(source_documents):
|
248 |
+
source_name = f"source_{source_idx+1}"
|
249 |
+
page_number = source_doc.metadata['page']
|
250 |
+
#page_number = source_doc.metadata.get('page', "NA") # NA or any default value
|
251 |
+
page = f"Page {page_number}"
|
252 |
+
text_element_content = source_doc.page_content
|
253 |
+
text_element_content = text_element_content if text_element_content != "" else "No Content"
|
254 |
+
#text_elements.append(cl.Text(content=text_element_content, name=source_name))
|
255 |
+
if page not in unique_pages:
|
256 |
+
unique_pages.add(page)
|
257 |
+
text_elements.append(cl.Text(content=text_element_content, name=page))
|
258 |
+
#text_elements.append(cl.Text(content=text_element_content, name=page))
|
259 |
+
source_names = [text_el.name for text_el in text_elements]
|
260 |
+
|
261 |
+
if source_names:
|
262 |
+
answer += f"\n\n Sources:{', '.join(source_names)}"
|
263 |
+
else:
|
264 |
+
answer += "\n\n No sources found"
|
265 |
+
|
266 |
+
await cl.Message(content=answer, elements=text_elements).send()
|
267 |
+
|
268 |
+
if __name__ == "__main__":
|
269 |
+
from chainlit.cli import run_chainlit
|
270 |
+
|
271 |
+
run_chainlit(__file__)
|
chainlit.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to Chat with Your Text File
|
2 |
+
|
3 |
+
With this application, you can chat with an uploaded text file that is smaller than 2MB!
|
embedding_model.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import tiktoken
|
3 |
+
import os
|
4 |
+
from langchain_openai import OpenAIEmbeddings
|
5 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
6 |
+
import torch
|
7 |
+
from transformers import AutoModel, AutoTokenizer
|
8 |
+
from transformers import AutoModel, AutoTokenizer
|
9 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
10 |
+
|
11 |
+
# def get_embeddings_model_bge_base_en_v1_5():
|
12 |
+
# model_name = "BAAI/bge-base-en-v1.5"
|
13 |
+
# model_kwargs = {'device': 'cpu'}
|
14 |
+
# encode_kwargs = {'normalize_embeddings': False}
|
15 |
+
# embedding_model = HuggingFaceBgeEmbeddings(
|
16 |
+
# model_name=model_name,
|
17 |
+
# model_kwargs=model_kwargs,
|
18 |
+
# encode_kwargs=encode_kwargs
|
19 |
+
# )
|
20 |
+
# return embedding_model
|
21 |
+
|
22 |
+
# def get_embeddings_model_bge_en_icl():
|
23 |
+
# model_name = "BAAI/bge-en-icl"
|
24 |
+
# model_kwargs = {'device': 'cpu'}
|
25 |
+
# encode_kwargs = {'normalize_embeddings': False}
|
26 |
+
# embedding_model = HuggingFaceBgeEmbeddings(
|
27 |
+
# model_name=model_name,
|
28 |
+
# model_kwargs=model_kwargs,
|
29 |
+
# encode_kwargs=encode_kwargs
|
30 |
+
# )
|
31 |
+
# return embedding_model , 4096
|
32 |
+
|
33 |
+
# def get_embeddings_model_bge_large_en():
|
34 |
+
# model_name = "BAAI/bge-large-en"
|
35 |
+
# model_kwargs = {'device': 'cpu'}
|
36 |
+
# encode_kwargs = {'normalize_embeddings': False}
|
37 |
+
# embedding_model = HuggingFaceBgeEmbeddings(
|
38 |
+
# model_name=model_name,
|
39 |
+
# model_kwargs=model_kwargs,
|
40 |
+
# encode_kwargs=encode_kwargs
|
41 |
+
# )
|
42 |
+
# return embedding_model
|
43 |
+
|
44 |
+
def get_embeddings_openai_text_3_large():
|
45 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
|
46 |
+
dimension = 3072
|
47 |
+
return embedding_model,dimension
|
48 |
+
|
49 |
+
# def get_embeddings_snowflake_arctic_embed_l():
|
50 |
+
# current_dir = os.path.dirname(os.path.realpath(__file__))
|
51 |
+
# model_name = "Snowflake/snowflake-arctic-embed-l"
|
52 |
+
# tokenizer = AutoTokenizer.from_pretrained(f"{current_dir}/cache/tokenizer/{model_name}")
|
53 |
+
# model = AutoModel.from_pretrained(f"{current_dir}/cache/model/{model_name}")
|
54 |
+
# return model,1024
|
55 |
+
|
56 |
+
def get_embeddings_snowflake_arctic_embed_l():
|
57 |
+
embedding_model = HuggingFaceEmbeddings(model_name="Snowflake/snowflake-arctic-embed-l")
|
58 |
+
return embedding_model,1024
|
pdfloader.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
from typing import List
|
3 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
4 |
+
from langchain_core.documents import Document
|
5 |
+
import asyncio
|
6 |
+
|
7 |
+
class PDFLoaderWrapper():
|
8 |
+
class LoaderType(str, Enum):
|
9 |
+
PYMUPDF = "pymupdf"
|
10 |
+
|
11 |
+
def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF):
|
12 |
+
self.file_path = file_path if isinstance(file_path, list) else [file_path]
|
13 |
+
self.loader_type = loader_type
|
14 |
+
|
15 |
+
async def aload(self) -> List[Document]:
|
16 |
+
all_docs = []
|
17 |
+
for file_path in self.file_path:
|
18 |
+
if self.loader_type == self.LoaderType.PYMUPDF:
|
19 |
+
try:
|
20 |
+
loader = PyMuPDFLoader(file_path)
|
21 |
+
docs = await loader.aload()
|
22 |
+
all_docs.extend(docs)
|
23 |
+
except Exception as e:
|
24 |
+
print(f"Error loading file {file_path}: {e}")
|
25 |
+
continue
|
26 |
+
return all_docs
|
27 |
+
|
pre-processing.ipynb
ADDED
@@ -0,0 +1,595 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"% pip install numpy==1.26.4 \\\n",
|
10 |
+
"openai==1.44.1 \\\n",
|
11 |
+
"qdrant-client==1.11.2 \\\n",
|
12 |
+
"langchain==0.3.0 \\\n",
|
13 |
+
"langchain-text-splitters==0.3.0 \\\n",
|
14 |
+
"langchain-community==0.3.0 \\\n",
|
15 |
+
"langchain_experimental \\\n",
|
16 |
+
"langchain_qdrant \\\n",
|
17 |
+
"langchain_openai \\\n",
|
18 |
+
"pypdf==4.3.1 \\\n",
|
19 |
+
"PyMuPDF==1.24.10 \\\n",
|
20 |
+
"pymupdf4llm \\\n",
|
21 |
+
"sentence_transformers \\\n",
|
22 |
+
"langchain_huggingface "
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "code",
|
27 |
+
"execution_count": 1,
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"BOR_FILE_PATH = \"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf\"\n",
|
32 |
+
"NIST_FILE_PATH = \"https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf\"\n",
|
33 |
+
"SMALL_DOC = \"https://arxiv.org/pdf/1908.10084\" \n",
|
34 |
+
"documents_to_preload = [\n",
|
35 |
+
" BOR_FILE_PATH,\n",
|
36 |
+
" NIST_FILE_PATH\n",
|
37 |
+
" # SMALL_DOC\n",
|
38 |
+
"]\n"
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"execution_count": 2,
|
44 |
+
"metadata": {},
|
45 |
+
"outputs": [
|
46 |
+
{
|
47 |
+
"name": "stderr",
|
48 |
+
"output_type": "stream",
|
49 |
+
"text": [
|
50 |
+
"/Users/jeevan/Documents/Learnings/ai-engineering-bootcamp/AIE4/AIE4/mid-term/ai-safety-chatapp/venv/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:13: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
51 |
+
" from tqdm.autonotebook import tqdm, trange\n"
|
52 |
+
]
|
53 |
+
}
|
54 |
+
],
|
55 |
+
"source": [
|
56 |
+
"# Embedding model - snowflake-arctic-embed-l\n",
|
57 |
+
"from langchain_huggingface import HuggingFaceEmbeddings\n",
|
58 |
+
"\n",
|
59 |
+
"model_name = \"Snowflake/snowflake-arctic-embed-l\"\n",
|
60 |
+
"embedding_model = HuggingFaceEmbeddings(model_name=model_name)"
|
61 |
+
]
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"cell_type": "code",
|
65 |
+
"execution_count": 3,
|
66 |
+
"metadata": {},
|
67 |
+
"outputs": [],
|
68 |
+
"source": [
|
69 |
+
"from pdfloader import PDFLoaderWrapper\n",
|
70 |
+
"from langchain_experimental.text_splitter import SemanticChunker\n",
|
71 |
+
"\n",
|
72 |
+
"\n",
|
73 |
+
"pdf_loader = PDFLoaderWrapper(\n",
|
74 |
+
" documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF\n",
|
75 |
+
")\n",
|
76 |
+
"documents = await pdf_loader.aload()\n",
|
77 |
+
"\n",
|
78 |
+
"text_splitter = SemanticChunker(embedding_model, breakpoint_threshold_type=\"percentile\",breakpoint_threshold_amount=90)\n",
|
79 |
+
"\n",
|
80 |
+
"chunked_docs = text_splitter.split_documents(documents)\n"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"cell_type": "code",
|
85 |
+
"execution_count": 4,
|
86 |
+
"metadata": {},
|
87 |
+
"outputs": [],
|
88 |
+
"source": [
|
89 |
+
"import os\n",
|
90 |
+
"import getpass\n",
|
91 |
+
"\n",
|
92 |
+
"os.environ[\"QDRANT_API_KEY\"] = getpass.getpass(\"Enter Your Qdrant API Key: \")"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "code",
|
97 |
+
"execution_count": 5,
|
98 |
+
"metadata": {},
|
99 |
+
"outputs": [
|
100 |
+
{
|
101 |
+
"data": {
|
102 |
+
"text/plain": [
|
103 |
+
"['eddeba090cf64372b937fdeeb4a66a05',\n",
|
104 |
+
" '04d716b884124244876b0dd6bba15b4e',\n",
|
105 |
+
" 'db68b0d183214d95a3b8be26f9a3072f',\n",
|
106 |
+
" 'cb21583a20c748aa898821c475825aa1',\n",
|
107 |
+
" '503b0d1da1354b3dba9903d889fa1dcf',\n",
|
108 |
+
" 'f5db16617a4b4ed69cf46c7739ce1705',\n",
|
109 |
+
" '1e1532cacc434b988de2039a9b07bd95',\n",
|
110 |
+
" 'eb62a186469e4d6a860ed9f2c32264cf',\n",
|
111 |
+
" 'e621542bdc944c35adad13321669a782',\n",
|
112 |
+
" '0a8ad7cbf78b488bbcb19bc046f991ea',\n",
|
113 |
+
" 'e24af2031ccc4b86afc5c5b868ce0875',\n",
|
114 |
+
" '6eed4c2596e14f9b8fbcad5a16682bf7',\n",
|
115 |
+
" '0adb7d6c0ed641fdb1aa7c38b44f205e',\n",
|
116 |
+
" '69559e35c6284d1a9711aa0d67cf3663',\n",
|
117 |
+
" '1ce9c69941e446aa83325edd2a43e6af',\n",
|
118 |
+
" '67472b47adf3441dbc315be9369b5bac',\n",
|
119 |
+
" '0470ecb0cb3c48e49d2aeabf8c7b8764',\n",
|
120 |
+
" '603575c0eff4430f914b1ff491c6cff0',\n",
|
121 |
+
" 'cbff97eea69343c4b2073c3264337c82',\n",
|
122 |
+
" '9f0a9425534041e5a117f546d5aa9e0f',\n",
|
123 |
+
" '0f551ca5bb74476d8bed3183315cb687',\n",
|
124 |
+
" 'd79db53763084978bffeb3d2a9ba888e',\n",
|
125 |
+
" '4adcbf0531504ec3a7672b6ac5e88695',\n",
|
126 |
+
" '5fa8f04f748d4fcc8bdb0e803ca38053',\n",
|
127 |
+
" '855328fce2db47ec9a128d8eab41b3b8',\n",
|
128 |
+
" '9be9afb0e477463db19d507179e18436',\n",
|
129 |
+
" '9356ffe511ae4f659f079895695a67f3',\n",
|
130 |
+
" '26b1283ce9064aab9e6399db9371e542',\n",
|
131 |
+
" 'f9e92a690fcf4a58a978abd173f39503',\n",
|
132 |
+
" 'd0fd1190717046d1a02d7c21f15e982c',\n",
|
133 |
+
" '48ddfed6388e48c2bbc260d77d2ea7b2',\n",
|
134 |
+
" '7391be7bceba44d7974ff05604422ff6',\n",
|
135 |
+
" 'a1645bb4440f4a2eb1ff3e5a818e1857',\n",
|
136 |
+
" '9265bbde2d9549c5aefc2da778943bb1',\n",
|
137 |
+
" 'c0e85a23783e4eaab735bf4906c5c4ee',\n",
|
138 |
+
" '2a8e0ade33134a16b696a6f274815682',\n",
|
139 |
+
" '1c4df2e13e314e419286bff9441848ca',\n",
|
140 |
+
" '74cd8670d0e748c1883497732ec03814',\n",
|
141 |
+
" '7cfad9000dd7406f86e12c740d5ef6eb',\n",
|
142 |
+
" '0ee580a675574c42ab6f2a191279c7e1',\n",
|
143 |
+
" 'f24963b680ae448eb3a5a98a629207c8',\n",
|
144 |
+
" '2d85b61d77244dfa8d15d665ff354bb4',\n",
|
145 |
+
" 'f38706bb1fe34e6db224c8fde5bf29b4',\n",
|
146 |
+
" 'e0a651dab4f14ab8bb27f886acb6f81d',\n",
|
147 |
+
" '8e41d96e0c40420fa79ce5b9460eea90',\n",
|
148 |
+
" '0f56a8ddaf7a41828b98e0ad531b571c',\n",
|
149 |
+
" '03d42d339edc4dc9bd0081b619f55ecc',\n",
|
150 |
+
" 'ba42fded48a249c694bf9a9b2e94c146',\n",
|
151 |
+
" 'c36879e922d745ce824c3c6c5391c671',\n",
|
152 |
+
" 'cf782c0ed9d3430a884e8ecf11fc4628',\n",
|
153 |
+
" '85334a4e52e04aa8b364131dd90857f9',\n",
|
154 |
+
" '796507d959704177af9fda3d8b5429a4',\n",
|
155 |
+
" 'd614bad2456842e7b8f3e92de44d68ec',\n",
|
156 |
+
" '6042cdd6a45a42228e8fa2bd909f3b53',\n",
|
157 |
+
" '6a0fbab037cd4d079d7abdc0694392b5',\n",
|
158 |
+
" '9bb6e193fe1f47a5a8efb433ca026586',\n",
|
159 |
+
" 'af5664a283ca4262a22cb706ba0866ee',\n",
|
160 |
+
" '868ea113f0b2470eb85bc7fbf7355ac7',\n",
|
161 |
+
" '13d0c504b19d4794b7f4e0b0d0ecb41b',\n",
|
162 |
+
" 'd24f3f8f3967458689d5315992b51f44',\n",
|
163 |
+
" '363d7278858142e1a9a8580dff2469b1',\n",
|
164 |
+
" 'c9f1bb8520344d5fb6cf7db77986c326',\n",
|
165 |
+
" '8a738ce3bafd4cdf94a2984e20016146',\n",
|
166 |
+
" '904f9cb924be4e9aa1d8d01e7c49e5c2',\n",
|
167 |
+
" '8dbdb46493964a15af5a7550442e076d',\n",
|
168 |
+
" '957d5363e2904646999b45fca59c10df',\n",
|
169 |
+
" '2f519f70fee240eaaf5b9cf19dc9c058',\n",
|
170 |
+
" 'afc3204b02c64c7e9a7bd10d4000acac',\n",
|
171 |
+
" 'df1d427abe9348a0baf4561b4fab104b',\n",
|
172 |
+
" 'ac96b17eb3f343c983120a9f4c5577d9',\n",
|
173 |
+
" '47f1f02b36d441aa9db960fa593323b0',\n",
|
174 |
+
" 'a604fa5fb1344139a6149836d6bc20dd',\n",
|
175 |
+
" '574857debc5c42b49b341a04f41c0d47',\n",
|
176 |
+
" 'ca59bb9be5e64470b226b36b78f7a11c',\n",
|
177 |
+
" 'f590db78eceb4c22b0c224b77ffa03b5',\n",
|
178 |
+
" 'bcbcbe3628214fd89b0f5f0c008700b8',\n",
|
179 |
+
" '58cf91d633794df09036a3246d44d3f4',\n",
|
180 |
+
" '58788414f5314fb3b9fbbe9d3b82ce40',\n",
|
181 |
+
" '3eb765a9b00a458b9c352f9ecd66069c',\n",
|
182 |
+
" 'c8ae3298d8b64ac3ab9f45dc6e0b1712',\n",
|
183 |
+
" '4d4b558e40994a919700b19d6ce3fb78',\n",
|
184 |
+
" '2dec1ed15f52459fada667c171b6cd15',\n",
|
185 |
+
" '70b7b33f7b3f4a89b22bdc587e51408f',\n",
|
186 |
+
" '5301362517fb475b916fa4dee82c0ece',\n",
|
187 |
+
" 'ac25b4c2668d44bea14c8299ddd17c92',\n",
|
188 |
+
" 'f0509d5035c14fa2aa6f91181134b0f3',\n",
|
189 |
+
" 'd53f9f0dc38149ffa3ea8b0aa6bbf849',\n",
|
190 |
+
" 'a2962de10d784220a72a380e8d409786',\n",
|
191 |
+
" 'b25e3cd5b3b148aba2627d88b93728d8',\n",
|
192 |
+
" 'a344f02041bc45e984452fe693bd26a6',\n",
|
193 |
+
" '53ec4ac26d6342f498fee283a5235d3f',\n",
|
194 |
+
" '323f6f0dd916451c9231730257d9b166',\n",
|
195 |
+
" '62f04a85af0c4b8aacf2bde02d0bbed3',\n",
|
196 |
+
" 'aa840da6722e458caeac99cc503ddd10',\n",
|
197 |
+
" '7d4fe147f0984ffc8d2f59ce86d5312a',\n",
|
198 |
+
" '2fee83e32198453986a73a7e3311bb14',\n",
|
199 |
+
" '9757507874714b57b7ff6c6a51aef718',\n",
|
200 |
+
" 'c7f42a04c66c4ab28c2c7c18ed33700a',\n",
|
201 |
+
" '49bd88e28a244910b888e79af4039282',\n",
|
202 |
+
" '420985e4c15c4c18aad851e95bdbebd8',\n",
|
203 |
+
" 'aaf353cf3b3b4e2c9709895271f55649',\n",
|
204 |
+
" '7a52229e16944616a1108caa86fc815d',\n",
|
205 |
+
" '629098069e2c4d09b845065c653a206d',\n",
|
206 |
+
" 'b37f1984a07e480b957057932b5e5f4d',\n",
|
207 |
+
" '7dfce2615e0344eb8147a10c57ef790e',\n",
|
208 |
+
" '07a1d0a768384b4aa77f986faa02a111',\n",
|
209 |
+
" 'eeeea24632f6475a9248613e6f6b7273',\n",
|
210 |
+
" '5ffbfc5623db434d807438c8a5c2a3db',\n",
|
211 |
+
" '83bf4a99c26f4374b39eb5685b6029a8',\n",
|
212 |
+
" 'd2176d5ac1934206ac13a25e3d3b4af9',\n",
|
213 |
+
" '8929fa8903a748748f51bb3e55e3aac2',\n",
|
214 |
+
" '21cedcf7d73a4a3cbb10319e64ebf145',\n",
|
215 |
+
" 'c1ba5a05221542838764f923be43d9e2',\n",
|
216 |
+
" '4d07a54bf62d4a929bec86ba278fa935',\n",
|
217 |
+
" '0cfbdccc2fb24637bd061e456996aad1',\n",
|
218 |
+
" '403ed966e49f44deb05101c4fa6ae485',\n",
|
219 |
+
" '073d20ce62514518b0a7ec495b317fef',\n",
|
220 |
+
" 'cbde5f3792b04bef8632c91906fc7146',\n",
|
221 |
+
" 'cb5bc23be3cf4ae389c346526c0323ae',\n",
|
222 |
+
" '7ca053a641394108b41dd8a91b1347df',\n",
|
223 |
+
" '027ad2187b8d4a80b84e95c59809d111',\n",
|
224 |
+
" 'bc49807d6f8c4f87a923366f83586ed8',\n",
|
225 |
+
" '41312c394807424ab191bf3bbe40416f',\n",
|
226 |
+
" '8390108ed60e465da899e0a2a688efaf',\n",
|
227 |
+
" '72353287c709410fafbb8096dfdf72c8',\n",
|
228 |
+
" 'b2902f5c90894246966ed217e6fb9574',\n",
|
229 |
+
" '10b79371e2364b0a9773fcacdc3daf05',\n",
|
230 |
+
" '3625bc3a69564ea39a28ae9bf70c4614',\n",
|
231 |
+
" '9177954079094aeaba7884bd0dffcc06',\n",
|
232 |
+
" '394fd830247141248a7454aea3aa489d',\n",
|
233 |
+
" '96877971f66644e6b8ab5767d4119a71',\n",
|
234 |
+
" 'a7e88bf685c14ba48c820f106b221881',\n",
|
235 |
+
" '216ec8e4e3fe439998fafe51e826db5f',\n",
|
236 |
+
" '1b66efa55a114c2985cc5236fb0b0742',\n",
|
237 |
+
" 'aa991a0cb36d4a75967c5e591bc08b97',\n",
|
238 |
+
" '6a6ce074204a48bf95032ae16c7db23c',\n",
|
239 |
+
" 'ff8e1f9be7804049af8e7c41974e4c5a',\n",
|
240 |
+
" '31a71b7f81674adf8c93d65a2d2eec63',\n",
|
241 |
+
" '9cc71e212bb14b909c79649596170c2a',\n",
|
242 |
+
" '9b042e1551d747bf9070b0b4ba8c0212',\n",
|
243 |
+
" '65f8e2ed927d41658069977d684be32e',\n",
|
244 |
+
" '6ed2ac368f3b4c04b2898fb6be21e98b',\n",
|
245 |
+
" 'f0924724eb644c1880e208dc4fab03e1',\n",
|
246 |
+
" '721f66c77001440b9d952fc8f6f4c56f',\n",
|
247 |
+
" '4843d3717aaf47d4b34e1f3a3cba498d',\n",
|
248 |
+
" '6465a722293d4645afeaaa3bd132d63e',\n",
|
249 |
+
" 'd26331770ecb486c9a5335da6bb0740d',\n",
|
250 |
+
" '8c7894b2ea4c421ba2937fdb4e34be86',\n",
|
251 |
+
" 'b20695156c6d42f4828e70d10b6f0277',\n",
|
252 |
+
" '6d64009de2774beab0f7c18424bb7327',\n",
|
253 |
+
" '2765c0b9b2a24917ab36b119ca8eef84',\n",
|
254 |
+
" 'bcf0337addee484882291f8cf9de408b',\n",
|
255 |
+
" '92b1f5a04ef340f486dd31b4eceb4a7c',\n",
|
256 |
+
" '512bfff959224382a066921c89bb735b',\n",
|
257 |
+
" 'e0558260a83c465eacff77ed9cb7308a',\n",
|
258 |
+
" '4074348ca89546bda70812b37b92fe14',\n",
|
259 |
+
" '21633e3b024e45ac998d58410d16e51d',\n",
|
260 |
+
" '0797be77777644129b6e907d0fb897bc',\n",
|
261 |
+
" 'd5d49a75b7a846e1b020bc3e49d62da7',\n",
|
262 |
+
" '10941d6aa34c45bbb0168578a486db2c',\n",
|
263 |
+
" '87b58399cb2a45c781ecf4d8e2f4e307',\n",
|
264 |
+
" 'e9d77e36c77b4de1bec4ae7951e38511',\n",
|
265 |
+
" '317c0b2c1d7d4652be7d51a1512e7f3f',\n",
|
266 |
+
" 'a2c77fe449e242c29ee83a43f28f8c58',\n",
|
267 |
+
" '6f8d84d606e94f66a9bcbd82941e5133',\n",
|
268 |
+
" '38aefb805fdf40f287e505154560f7b4',\n",
|
269 |
+
" '8e720f4f5fde454d94d941cc8c027d8e',\n",
|
270 |
+
" '33dda74031614e2dbae31283d2674e65',\n",
|
271 |
+
" '68b4b01b9bb2441cad5182ac4d0a8c01',\n",
|
272 |
+
" '97bcb73c830b4429b446e3ab718de1b1',\n",
|
273 |
+
" 'c510dce650ec469e99b4653b80284d64',\n",
|
274 |
+
" '9ed79818fd9d406790961038e968d87c',\n",
|
275 |
+
" 'b7fc935f6b4f4922ba2c9695f1c6f253',\n",
|
276 |
+
" '9c5c6635c12a48e4bf50a5823aa13bec',\n",
|
277 |
+
" 'ea3882dac1c345d4a5f24589b64d273b',\n",
|
278 |
+
" 'd77d8d6861104592aa4849b6a6e21ded',\n",
|
279 |
+
" '260626f4c965432bb342768026f132e5',\n",
|
280 |
+
" '56e3ecd4eb64413a9a943bffe1ad86af',\n",
|
281 |
+
" '6638ad84fef94984a6082915855935c3',\n",
|
282 |
+
" '37eb01f2393e4402acc1547a954d2b7d',\n",
|
283 |
+
" 'aeffe25827314271b00284704a245dc1',\n",
|
284 |
+
" '922a3f98218c4dc7b3a1a574e5bfab41',\n",
|
285 |
+
" 'adf1623b0f2e490b8f5f62cc15e3fd03',\n",
|
286 |
+
" 'aaebb687fb1945009cbe473dae7b48e6',\n",
|
287 |
+
" '5be6cd8b0f224c8d94e92b32b35f2d46',\n",
|
288 |
+
" '4f9ce579447a4bd4a76d6288cf2652e2',\n",
|
289 |
+
" 'c0456f7c254346399654aa02ba1796d2',\n",
|
290 |
+
" '32cafe4487d04f498fd721b105d2a1ca',\n",
|
291 |
+
" 'b321bdeb2d9e4cb7a017aed9cedee7d6',\n",
|
292 |
+
" 'b8c3466d506842e1af7ac94deee57b81',\n",
|
293 |
+
" '93a3dba99695448f8123c13bb8b779c8',\n",
|
294 |
+
" '1a22fc8c63164c12bc2e6dcc1e558f4c',\n",
|
295 |
+
" 'bde4793c8ed242dcbec5da1cf329912b',\n",
|
296 |
+
" '1c45a001e9894b4ab2e8ba13db0c47c6',\n",
|
297 |
+
" '0971bf69ee1043c69d3126c9e056d7a8',\n",
|
298 |
+
" '2c8864207467424e98a32256e13d78e9',\n",
|
299 |
+
" 'a5712b28bc7244b69b2f1f4bd203383c',\n",
|
300 |
+
" 'd14300d16e404c8087980ea322d7c923',\n",
|
301 |
+
" '96af51f4384a472c8ed57f5fd85d6d0f',\n",
|
302 |
+
" 'dc43cafd35384e169a90ff6a4dd7e603',\n",
|
303 |
+
" 'ee49f93bc1924ee895f2ed1cd9dd5807',\n",
|
304 |
+
" '883e971384d74702894bd84742e9e01d',\n",
|
305 |
+
" '4cb32ece2cc1453c8b283966d77a573f',\n",
|
306 |
+
" '87da85de0eb24c0291409b33f20f4fe0',\n",
|
307 |
+
" '71d0f05cdd8e49c1b12fa6c4290e19b9',\n",
|
308 |
+
" '5787fd6f5f934396bdba27c0982c69fd',\n",
|
309 |
+
" 'ac3c132b8fa646b998bce3f8e816bfae',\n",
|
310 |
+
" '9e17210401cb4d0998520db1313023c7',\n",
|
311 |
+
" 'd24cbb397d16417290039b255903c915',\n",
|
312 |
+
" '3ba47099629d48249da0ff0633c2547b',\n",
|
313 |
+
" 'f07dce036bf24a7e88af1a021fc97d42',\n",
|
314 |
+
" '165045264b9b4c2881f675d51e66a2a6',\n",
|
315 |
+
" 'feb749b3910f416597e18b6573f5919f',\n",
|
316 |
+
" '022e7607e8ba428b9213f763e032a895',\n",
|
317 |
+
" '5ada3caafac145ee946015d373d46cb2',\n",
|
318 |
+
" '9895042a53e547e1a4df189e719783c8',\n",
|
319 |
+
" '73c1c19ba70146e8a7703b0bdf3d29c8',\n",
|
320 |
+
" '9f5714b0d63547a3ac6b62c0a8795af3',\n",
|
321 |
+
" '754785ee7b6d44f5a474940eade68323',\n",
|
322 |
+
" '93aa78a59c044242a3c774d66eb0ce38',\n",
|
323 |
+
" '19fbf46edf5b42ea98ce35f4becb22c9',\n",
|
324 |
+
" '88f6394d368c4a4dbaa91f2ca3f06e83',\n",
|
325 |
+
" '2e8e6595709a4e77a0656833e14de87a',\n",
|
326 |
+
" '83a8ea55e3374628a8d2817f949f4a33',\n",
|
327 |
+
" 'bca22c47143c46dcbc350e393f43a260',\n",
|
328 |
+
" '32dfd090a865443cafc87324f0f583bb',\n",
|
329 |
+
" '069a75a888bd43ca9b425f76eba236ba',\n",
|
330 |
+
" 'e4240b6768a24309960f6671510e2b97',\n",
|
331 |
+
" '29068f3ec43b4aabb78f49b93bbc0704',\n",
|
332 |
+
" '25f21b35cbdf49729887854567e8f093',\n",
|
333 |
+
" 'bb01bcd9b6f9464fb19296279cd228ce',\n",
|
334 |
+
" '6c7c0873830241c5acfc7a5ff68c3e7e',\n",
|
335 |
+
" 'fa6aab7a0587489f8218465112ca1027',\n",
|
336 |
+
" '0a3d9574f789460c8ac54e69aa5a896d',\n",
|
337 |
+
" '9cdf3f1556964e0fb95dce551b9d80d0',\n",
|
338 |
+
" 'e02b0fea19f24f40b9e21aa065aec5e2',\n",
|
339 |
+
" '02b5b6915447495a9b40d28631e20621',\n",
|
340 |
+
" 'fd7e17383d434aff8e3c1578381a2be5',\n",
|
341 |
+
" 'f54d6b530dc44c81861a3421ddf85477',\n",
|
342 |
+
" '31016f3fe8f44f2a95ec885eea02a1a0',\n",
|
343 |
+
" '89a6bb3ef75045a8a13d0e1717536a7b',\n",
|
344 |
+
" '10ad95ca700f4d8095230aa7ccacdf45',\n",
|
345 |
+
" 'fa02f1851c694e98b9fc8b40f0789fca',\n",
|
346 |
+
" 'e307d59d2bda4bb498810bd088d58c26',\n",
|
347 |
+
" '09531751f0ad46c990d830f784d49920',\n",
|
348 |
+
" '463a5cc86f014d9b9c934019f8a6a3b0',\n",
|
349 |
+
" 'c6da3d870a664ae5a58d5b0f24f86afa',\n",
|
350 |
+
" '0347b993d5104dbe9c58cd87ec0e0920',\n",
|
351 |
+
" 'baf4197276f34479bf254bbb95457a78',\n",
|
352 |
+
" '4dc1052154984cdfbab206cb21389316',\n",
|
353 |
+
" '9201ed1c46b842fbb57c14abf6dc681c',\n",
|
354 |
+
" '67d60aae5043431f9296735656c1c578',\n",
|
355 |
+
" '96b88d3220c1438584ce83d20e7126d5',\n",
|
356 |
+
" 'cb24d605797f42b1956bdf34121734b8',\n",
|
357 |
+
" '6d180b3524eb43f5b1629e0edbeed005',\n",
|
358 |
+
" '422ab68c4bb44d9485a7e891de8d8bb2',\n",
|
359 |
+
" '2efea4ce3a1f489490e0eb277e2f9b51',\n",
|
360 |
+
" 'f39aeee6512644f78ae848bd0b17925c',\n",
|
361 |
+
" '922ba55579cc40a9ae6de8ff4d8f2fcb',\n",
|
362 |
+
" 'c94dce24f53b4fc597b0a9844cb9f2d1',\n",
|
363 |
+
" '92e5324c55f54c74bfeeae5a5ee472d0',\n",
|
364 |
+
" 'da29c33c41d54889926938c763559184',\n",
|
365 |
+
" '24175ca064f94ab18ffd755537fbeb08',\n",
|
366 |
+
" '737a9a163ab04803ab4811b083e84a7f',\n",
|
367 |
+
" 'c48c7210f7e04f42ba749352a90a2e14',\n",
|
368 |
+
" '33d1da5d12e84f039af98e53805463db',\n",
|
369 |
+
" '168384cba6a94e7ab9801b98eb25a83e',\n",
|
370 |
+
" '216c155d1910410e9ff3e2981305b0a1',\n",
|
371 |
+
" '9d2baad2bf734ec395e388832f375428',\n",
|
372 |
+
" '8f427298554343db9afef5f0ca04508f',\n",
|
373 |
+
" '91d16d31d0c84742ac7b410d634a77b4',\n",
|
374 |
+
" '49eead9aa61346fc8c20e6b320968f9c',\n",
|
375 |
+
" '2726ba58fdca4fab8b3d504088f5bb16',\n",
|
376 |
+
" '7cbfe53e55d94c9eab33049257b5b996',\n",
|
377 |
+
" '1829f7c9c7f64741a02e66cf07d7e4ca',\n",
|
378 |
+
" 'dc51c1d3980a41be85154cb81492f0d8',\n",
|
379 |
+
" '30babae9382a4204b65dfcea45b94111',\n",
|
380 |
+
" 'b3cf2c85a1aa43369f0d8bd3d1148214',\n",
|
381 |
+
" '8e66f9797e9542df8401cbeba12bf576',\n",
|
382 |
+
" 'e158952bb2a444b3b0137cc9ed09b2b3',\n",
|
383 |
+
" 'cc424f6c708d46c3b6ff0ccc532256a8',\n",
|
384 |
+
" '9f15d95cc0484a8b84f52c4e6aa6a3d1',\n",
|
385 |
+
" 'dd5123a7824b41fcbf9553691f4f8ce4',\n",
|
386 |
+
" '0c568b98747f4deda37a886d8176eaef',\n",
|
387 |
+
" '71b8843b17914ae2926ab5f7cc6c2afc',\n",
|
388 |
+
" '7123c69c56fc4ca79e510fe962fda11f',\n",
|
389 |
+
" '0662e249e7e846f7a34cdfb6b0ab97a4',\n",
|
390 |
+
" '37614a4fa3bc4b6892ed640136432595',\n",
|
391 |
+
" 'ba977631881b4e7887541f304cbdbf84',\n",
|
392 |
+
" 'a9e9604bbe8546db8379df385c6775bb',\n",
|
393 |
+
" '29cbaedfdc4842be8af5ef11fb0ae880',\n",
|
394 |
+
" 'fb96faf7d811465197ecf0bebb5ef7af',\n",
|
395 |
+
" 'a386736ae3e34fd8810f61554daff3a8',\n",
|
396 |
+
" '3322d87785f74f7bb03fff6184a88959',\n",
|
397 |
+
" '8d56b24bcce647ef92bcea0b9e730535',\n",
|
398 |
+
" '9df83b49b7b347e4880663e59fd15a4c',\n",
|
399 |
+
" 'e6beb25ea09243edab2d0b917d4ed658',\n",
|
400 |
+
" 'b788547e806548e4a6c0be68c37783c8',\n",
|
401 |
+
" 'd0cc604bb0aa47158eda39e6ea990c9a',\n",
|
402 |
+
" 'fa8ea860f8604a78a544a21ff65a9fa6',\n",
|
403 |
+
" '73efff02cfa3420dbc5d8412282093e4',\n",
|
404 |
+
" '8a891d866e8f41839dfa04dcfab7704d',\n",
|
405 |
+
" 'af9020b82fcb4f7dac6466a661c26915',\n",
|
406 |
+
" '10ea6f99054b4844bb42258530fb0058',\n",
|
407 |
+
" 'c4a6866e6ce84c468caf7cdb6e6d535b',\n",
|
408 |
+
" 'b0c1794f0f7f4f7c8fd21ab6598eccac',\n",
|
409 |
+
" 'ac530d4f83ee4c418e52245ab06630c2',\n",
|
410 |
+
" '860d9613fdfb4b969cb5dec21b29be06',\n",
|
411 |
+
" '220bd37a37ca4f259844deb952c32245',\n",
|
412 |
+
" '2eaa29f0eb34476fa3f67669e3da5b62',\n",
|
413 |
+
" '550db6a68b394795926cb1faaa1e0a52',\n",
|
414 |
+
" 'e86011e86a524ee8bf17f674fbb687e3',\n",
|
415 |
+
" '7d7b1c4d811d42d4aa80f5582c64f9ac',\n",
|
416 |
+
" '5a4c8245a6394f7db6562a9324683a03',\n",
|
417 |
+
" '96aa6bdf11964b2cb2531bba32f34249',\n",
|
418 |
+
" '168db84dd98248a88d5de07aa13496ae',\n",
|
419 |
+
" 'd67d59079f4a4c10b80d571cb438de23',\n",
|
420 |
+
" 'd7158abe270d496d9985420c9b03a077',\n",
|
421 |
+
" '2fde1e21c14a4f90a113b395a3712743',\n",
|
422 |
+
" '2510f9f4fe8b4bb78bb880469169476a',\n",
|
423 |
+
" 'a4a1b06704044d4eadf8b2e6142e79c9',\n",
|
424 |
+
" '0a466939b2d54d5293770a7b1bfecda1',\n",
|
425 |
+
" '79aee364cb994e36b1adde5b8e5e3086',\n",
|
426 |
+
" '5592d4e1f1ec4307956ff1a56c7a9a95',\n",
|
427 |
+
" '3deee26006c8406da04f85d82ef7c52b',\n",
|
428 |
+
" '4f3bb845b65f4ff9b6dc44bc0e6ff645',\n",
|
429 |
+
" '63b8bbe1a5fc461dad75fa7aed4e1382',\n",
|
430 |
+
" 'bd833bdc626741e1bd3af21e90b598e0',\n",
|
431 |
+
" 'dfb14880e30d4dbf8caf0074d68d97c1',\n",
|
432 |
+
" 'f8d84d5b0edb487db8cd381569a5d79e',\n",
|
433 |
+
" 'b43b34c5ec3449bebe6eaec3ecf141b4',\n",
|
434 |
+
" '57c7120546b1405984982cfdc6198077',\n",
|
435 |
+
" '9a53cef8c1344f75ab49b328c11f98a0',\n",
|
436 |
+
" 'bf49b6b8d8594e7588b1b108007034b2',\n",
|
437 |
+
" 'c0b6a58b37444cc0b556cfeee6ca8b55',\n",
|
438 |
+
" 'a6a8db63ba4a4baf9d32f320c21c6313',\n",
|
439 |
+
" 'c61e63e1f9b34e3dae8129e3010d8706',\n",
|
440 |
+
" '6be3ec9a2fca469f9c9915d056cc8324',\n",
|
441 |
+
" '746efb41eb2c40a88aedb82e5b29dfca',\n",
|
442 |
+
" '46c5071d7e204ff888dce1ed7dfffff4',\n",
|
443 |
+
" '04b68080947d4c96b13f7e9eb428070d',\n",
|
444 |
+
" '66309b0484914b099cda9744b3bfae55',\n",
|
445 |
+
" '5fe570e4ef224d41aab64aab07f7ef18',\n",
|
446 |
+
" '8a698955d62d4505be78c0914e1ec7ab',\n",
|
447 |
+
" 'b1f0063f145e4c2f9900dc7ba64a092f',\n",
|
448 |
+
" 'f3ab3e53fb204ef7a6d30c6513e42bc3',\n",
|
449 |
+
" '03295952d1d241b3bc7ed1abd990e32a',\n",
|
450 |
+
" 'efd0faadabbe4dad9ae7af70fadee972',\n",
|
451 |
+
" 'e398133bbea24e48af9f8cab8f32310e',\n",
|
452 |
+
" 'db3e34ee20c043be804e1c1a9db594fb',\n",
|
453 |
+
" '991a44d7e9ab4ae486964e6d26034918',\n",
|
454 |
+
" '55910ec6f0f64a49b167553ff9a20f61',\n",
|
455 |
+
" '1d9d5538d4d34e3e992e3891ead9cae8',\n",
|
456 |
+
" '863753ca91b74fbebd46d2a3f0f0b7a6',\n",
|
457 |
+
" 'b13efd943e7947259e5f807b2229ccf3',\n",
|
458 |
+
" 'b0fddb4609a54fc8b0dcc6cab57161a9',\n",
|
459 |
+
" '0f943749f0d24d8e94fd1dc209b09b9c',\n",
|
460 |
+
" '10af8065bf0b4eb5aca05993ac546176',\n",
|
461 |
+
" '23ed6a4ca13343099d920007034f7d34',\n",
|
462 |
+
" 'ceb0455e738b47d7b3b7fd99506dbb65',\n",
|
463 |
+
" '940d178005994c83b17652f086e292d6',\n",
|
464 |
+
" '69ee5697b0b742dd86990299a8c8cefe',\n",
|
465 |
+
" 'ab48f4fad8d34410b9949d071abdfb42',\n",
|
466 |
+
" '135ba4ad88fb4300a560a7323535e37b',\n",
|
467 |
+
" '66c9e4f32ca8418d9922de33bf84dcd4',\n",
|
468 |
+
" 'c776e9698a424f8a9271f3035f02c041',\n",
|
469 |
+
" '7544393d08454b2191837910d05db972',\n",
|
470 |
+
" 'd9a62e36fdeb40799071931e14b52963',\n",
|
471 |
+
" 'dc66a111d00344b5af5963c0d118d37b',\n",
|
472 |
+
" '9d1f6a6c09a14334b3adc4799cfd8d7e',\n",
|
473 |
+
" '27587b7eec344848a4fd80724d74c916',\n",
|
474 |
+
" '502aed42f63440c791cd27e8c4d0ebeb',\n",
|
475 |
+
" '5f1b1c46c7b04b27a8821b2c26425cba',\n",
|
476 |
+
" 'cd51cdfa35934fe19059aed56da1e35b',\n",
|
477 |
+
" '6fe71508c191419db508a9958ccf94e3',\n",
|
478 |
+
" '3e33955d1f0c4729b92f5560bc9a5f48',\n",
|
479 |
+
" '5d6837486bae4495857be79cac8fbab6',\n",
|
480 |
+
" '7e526fdb9dd149b39718bbdf652fa10f',\n",
|
481 |
+
" '6a7504d312b34b70925f761ec113e06e',\n",
|
482 |
+
" 'cdbc28b9da1846d8a6a9209fc4052d9d',\n",
|
483 |
+
" 'd5e7d7ca7c6b46689385ef8ec9acf389',\n",
|
484 |
+
" 'c521c62ec85349208aef52f72a0ab0a3',\n",
|
485 |
+
" '5af3b7b361884f22b1ccd94b1bf56718',\n",
|
486 |
+
" '3da65a3dfebe4074bfee9df2216ac6a0',\n",
|
487 |
+
" '989436d80f11412786de32631a20164c',\n",
|
488 |
+
" 'b8934952cb644d6186db12fea1f36900',\n",
|
489 |
+
" '8961e893462e4750ace14ab8089730a2',\n",
|
490 |
+
" 'df6d995cc7a94f19bb82c56be27bcb8d',\n",
|
491 |
+
" '9802e6e00fb641dd901a90b7c101b578',\n",
|
492 |
+
" '2c77bc0c03d245ca8692ba2a05b9ff57',\n",
|
493 |
+
" '2c42f0f9da6d4ef9986bc069c1dde12d',\n",
|
494 |
+
" '6263199a92654cddbe70aaaaee82b0a3',\n",
|
495 |
+
" '20c172b306cc4262929dab4a7b734760',\n",
|
496 |
+
" '2961b6f93919473185e6c407801e4a2e',\n",
|
497 |
+
" 'f430faeec2984de0b12cb14d35200475',\n",
|
498 |
+
" '2455745f94554dd694969decf6232d67',\n",
|
499 |
+
" 'a8307d768d574b71a50e69aa3e906913',\n",
|
500 |
+
" 'a8af04cdc83b4bcca4b3334cc4f87a31',\n",
|
501 |
+
" 'e0105deb25104259922024363eb278f9',\n",
|
502 |
+
" '6dbba361217f4f65845ad1e58f52f66d',\n",
|
503 |
+
" 'd6b391d2232b439d861c85d81aeffd13',\n",
|
504 |
+
" 'f0357e35b61649968204afe0afcaa986',\n",
|
505 |
+
" '84b5f1ea889a438495be7f5286516439',\n",
|
506 |
+
" 'fefa48d67f004ae991280d670ff0525f',\n",
|
507 |
+
" 'b49d4c918add4577b2ba23a7cae95a2f',\n",
|
508 |
+
" '8227801e19af44e6bd5552ca958dc83d',\n",
|
509 |
+
" '011c11d3da5b45aa98eca658fe04934c',\n",
|
510 |
+
" '5e5348434bf94d50a57995b5e64ebf13',\n",
|
511 |
+
" '6c704ef7e4cf4ccb9e911e34c12a503a',\n",
|
512 |
+
" '739b2782f9ad4e8cb9b5157bdbc2f92d',\n",
|
513 |
+
" 'cdb3dbcb67fa4e4b84f2b89cc0fa35e0',\n",
|
514 |
+
" '1bea36f66ac64c95b895cec1540da580',\n",
|
515 |
+
" '8566fba4d0aa43b293cf64db66b53051',\n",
|
516 |
+
" 'c3ec323138fc42678d77c8c483d8e73a',\n",
|
517 |
+
" '008c74419bd544899d9d83212e531ba5',\n",
|
518 |
+
" '3c3237238c6b4bfe990734daa6344fb7',\n",
|
519 |
+
" '7a44fe6828c54c469b52a0d215e38135',\n",
|
520 |
+
" 'bbbc1bbb596d4ba28b78eee1908e57a4',\n",
|
521 |
+
" 'bde8eb1d0f314d728fd3f94cce68c5ae',\n",
|
522 |
+
" '39c307c0404141c1a51784fcce1834f2',\n",
|
523 |
+
" '3a82fd7dca36498495bb3b6400656bc6',\n",
|
524 |
+
" '1c790b01db7a4bfda6a59542322ce975',\n",
|
525 |
+
" 'f56b41ea938547eaac61edabd71e0cc2',\n",
|
526 |
+
" '55708b9971954a77a64440b4e2a4d437',\n",
|
527 |
+
" '6052cc5180aa43359948f92a2fba7fd2',\n",
|
528 |
+
" '2a9f782eb0b94d2381c2b902b89313db',\n",
|
529 |
+
" 'fcb302874996442296870bdff15b2d4f',\n",
|
530 |
+
" '0864ff8559dc43be94959f7493dd6067',\n",
|
531 |
+
" '7bc9df2622734502bfacdd235b66edd1',\n",
|
532 |
+
" 'a01170164ec84b7194848a9021586d99',\n",
|
533 |
+
" 'e2c7ea2f03cd4100bef06b31c15d5df6',\n",
|
534 |
+
" 'b85b7a4a8660444fa704ecef67e5978c']"
|
535 |
+
]
|
536 |
+
},
|
537 |
+
"execution_count": 5,
|
538 |
+
"metadata": {},
|
539 |
+
"output_type": "execute_result"
|
540 |
+
}
|
541 |
+
],
|
542 |
+
"source": [
|
543 |
+
"from langchain_qdrant import QdrantVectorStore\n",
|
544 |
+
"from langchain_core.documents import Document\n",
|
545 |
+
"from qdrant_client import QdrantClient\n",
|
546 |
+
"from qdrant_client.http.models import Distance, VectorParams\n",
|
547 |
+
"\n",
|
548 |
+
"dimension = 1024\n",
|
549 |
+
"collection_name = \"ai-safety-sr-arctic-embed-l-semantic\"\n",
|
550 |
+
"qdrant_server = \"https://500cb0e8-ea08-4662-b4f2-3eca11e635da.europe-west3-0.gcp.cloud.qdrant.io:6333\"\n",
|
551 |
+
"qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ[\"QDRANT_API_KEY\"])\n",
|
552 |
+
"qdrant_client.create_collection(\n",
|
553 |
+
" collection_name=collection_name,\n",
|
554 |
+
" vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),\n",
|
555 |
+
")\n",
|
556 |
+
"\n",
|
557 |
+
"vector_store = QdrantVectorStore(\n",
|
558 |
+
" client=qdrant_client,\n",
|
559 |
+
" collection_name=collection_name,\n",
|
560 |
+
" embedding=embedding_model,\n",
|
561 |
+
")\n",
|
562 |
+
"\n",
|
563 |
+
"vector_store.add_documents(chunked_docs)"
|
564 |
+
]
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"cell_type": "code",
|
568 |
+
"execution_count": null,
|
569 |
+
"metadata": {},
|
570 |
+
"outputs": [],
|
571 |
+
"source": []
|
572 |
+
}
|
573 |
+
],
|
574 |
+
"metadata": {
|
575 |
+
"kernelspec": {
|
576 |
+
"display_name": "venv",
|
577 |
+
"language": "python",
|
578 |
+
"name": "python3"
|
579 |
+
},
|
580 |
+
"language_info": {
|
581 |
+
"codemirror_mode": {
|
582 |
+
"name": "ipython",
|
583 |
+
"version": 3
|
584 |
+
},
|
585 |
+
"file_extension": ".py",
|
586 |
+
"mimetype": "text/x-python",
|
587 |
+
"name": "python",
|
588 |
+
"nbconvert_exporter": "python",
|
589 |
+
"pygments_lexer": "ipython3",
|
590 |
+
"version": "3.11.9"
|
591 |
+
}
|
592 |
+
},
|
593 |
+
"nbformat": 4,
|
594 |
+
"nbformat_minor": 2
|
595 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.26.4
|
2 |
+
chainlit==0.7.700 # 1.1.402
|
3 |
+
openai==1.44.1
|
4 |
+
qdrant-client==1.11.2
|
5 |
+
langchain==0.3.0
|
6 |
+
langchain-text-splitters==0.3.0
|
7 |
+
langchain-community==0.3.0
|
8 |
+
langchain_experimental
|
9 |
+
langchain_qdrant
|
10 |
+
langchain_openai
|
11 |
+
pypdf==4.3.1
|
12 |
+
PyMuPDF==1.24.10
|
13 |
+
pymupdf4llm
|
14 |
+
sentence_transformers
|
15 |
+
langchain_huggingface
|