In [None]:
!pip install -qU langsmith langchain-core langchain-community langchain-openai langchain-qdrant langchain_experimental pymupdf ragas

In [None]:
import os
import getpass
from uuid import uuid4

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangChain API Key:")

os.environ["LANGCHAIN_PROJECT"] = "AIM-SDG-MidTerm - AI Safety"
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

os.environ["QDRANT_API_KEY"] = getpass.getpass("Enter Your Qdrant API Key: ")

# Synthetic data generation using Ragas framework

We will generate set of synthetic data for evaluating different opetions
1. Evaluating Embedding model
2. Evaluating Chunking Strategies

In [None]:
from langchain_experimental.text_splitter import SemanticChunker

from enum import Enum
from typing import List
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.documents import Document
import asyncio

class PDFLoaderWrapper():
 class LoaderType(str, Enum):
 PYMUPDF = "pymupdf"

 def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF):
 self.file_path = file_path if isinstance(file_path, list) else [file_path]
 self.loader_type = loader_type

 async def aload(self) -> List[Document]:
 all_docs = []
 for file_path in self.file_path:
 if self.loader_type == self.LoaderType.PYMUPDF:
 try:
 loader = PyMuPDFLoader(file_path)
 docs = await loader.aload()
 all_docs.extend(docs)
 except Exception as e:
 print(f"Error loading file {file_path}: {e}")
 continue
 return all_docs

BOR_FILE_PATH = "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
NIST_FILE_PATH = "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"
documents = [
 BOR_FILE_PATH,
 NIST_FILE_PATH
]

pdf_loader = PDFLoaderWrapper(
 documents, PDFLoaderWrapper.LoaderType.PYMUPDF
)
documents = await pdf_loader.aload()



In [None]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.testset.docstore import Document, DocumentStore,InMemoryDocumentStore
from langchain_experimental.text_splitter import SemanticChunker,RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from ragas.testset.extractor import KeyphraseExtractor

print ("Packages import complete")
print ("Getting the Embedding model from Huggingface")
# Using best performing embedding model from hugging face to generate quality dataset.
# Need GPU
model_name = "Snowflake/snowflake-arctic-embed-l"
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
print ("Embedding model loaded")

print ("Splitting the documents into semantic chunks")
text_splitter = RecursiveCharacterTextSplitter(
 chunk_size = 1024,
 chunk_overlap = 100,
 length_function = len,
)
chunked_docs = text_splitter.split_documents(documents)

print ("Creating the document store for ragas and loading LLM models")
generator_llm = ChatOpenAI(model="gpt-4o")
critic_llm = ChatOpenAI(model="gpt-4o")

keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)
docstore = InMemoryDocumentStore(splitter=text_splitter,extractor=keyphrase_extractor, embeddings=embedding_model)

print ("Creating the testset generator")
generator = TestsetGenerator.from_langchain( # Default uses TokenTextSplitter
 generator_llm=generator_llm,
 critic_llm=critic_llm,
 embeddings=embedding_model,
 docstore=docstore # Document store uses SemenaticChunker
)

distributions = {
 simple: 0.5,
 multi_context: 0.3,
 reasoning: 0.2
}

In [None]:
test_size = 50

testset = generator.generate_with_langchain_docs(
 documents, 
 test_size, 
 distributions, 
 with_debugging_logs=True
) # Default RunConfig(max_retries=15, max_wait=90)

In [None]:
testset_df = testset.to_pandas()
testset_df

In [None]:
testset_df.to_csv('task5-ai-safety-sdg.csv', index=False)
test_questions = testset_df["question"].values.tolist()
test_groundtruths = testset_df["ground_truth"].values.tolist()

## Create Rag chain to generate answers for above questions in the dataset

> Note that we are usig Qdrant cloud where the pdf document is processed and saved for us to consume. For the RAG pipeline we use the same embedding model originally used to populate the Qdrant vectorstore.

In [None]:
from langchain_qdrant import QdrantVectorStore
from langchain_core.documents import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

dimension = 1024
collection_name = "ai-safety-sr-arctic-embed-l-recursive"
qdrant_server = "https://500cb0e8-ea08-4662-b4f2-3eca11e635da.europe-west3-0.gcp.cloud.qdrant.io:6333"
qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ["QDRANT_API_KEY"])
qdrant_client.create_collection(
 collection_name=collection_name,
 vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
 client=qdrant_client,
 collection_name=collection_name,
 embedding=embedding_model,
)

retriever = vector_store.as_retriever()

In [None]:
from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Context: {context}
Question: {question}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [None]:
from langchain_openai import ChatOpenAI

# Using the same model used in the app.
chat_model_name = "gpt-4o"
llm = ChatOpenAI(model=chat_model_name)

In [None]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import StrOutputParser

rag_chain_generate_anwsers = (
 {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
 | rag_prompt | llm | StrOutputParser()
)

In [None]:
rag_chain_generate_anwsers.invoke({"question" : "What steps can organizations take to minimize bias in AI models?"})

# Create Rag Chain with config 

We are going to replicate the exact implementation used in the hosted RAG app but with different configuration to evaluate and compare.

In [None]:
# Utility function to create a rag chain with config
from langchain_experimental.text_splitter import SemanticChunker
from enum import Enum
from typing import List
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.documents import Document
import asyncio
from langchain_qdrant import QdrantVectorStore
from langchain_core.documents import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import StrOutputParser
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import MessagesPlaceholder
from langchain.prompts import ChatPromptTemplate
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory

BOR_FILE_PATH = "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
NIST_FILE_PATH = "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"
documents_to_preload = [
 BOR_FILE_PATH,
 NIST_FILE_PATH
]
store = {}

class PDFLoaderWrapper():
 class LoaderType(str, Enum):
 PYMUPDF = "pymupdf"

 def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF):
 self.file_path = file_path if isinstance(file_path, list) else [file_path]
 self.loader_type = loader_type

 async def aload(self) -> List[Document]:
 all_docs = []
 for file_path in self.file_path:
 if self.loader_type == self.LoaderType.PYMUPDF:
 try:
 loader = PyMuPDFLoader(file_path)
 docs = await loader.aload()
 all_docs.extend(docs)
 except Exception as e:
 print(f"Error loading file {file_path}: {e}")
 continue
 return all_docs

async def get_contextual_compressed_retriever(retriver):

 base_retriever = retriver
 compressor_llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=1500)
 compressor = LLMChainExtractor.from_llm(compressor_llm)

 #Combine the retriever with the compressor
 compression_retriever = ContextualCompressionRetriever(
 base_compressor=compressor,
 base_retriever=base_retriever
 )
 return compression_retriever

def create_history_aware_retriever_self(chat_model, retriever):
 contextualize_q_system_prompt = (
 "Given a chat history and the latest user question which might reference context in the chat history, "
 "formulate a standalone question which can be understood without the chat history. Do NOT answer the question, "
 "just reformulate it if needed and otherwise return it as is."
 )
 contextualize_q_prompt = ChatPromptTemplate.from_messages(
 [
 ("system", contextualize_q_system_prompt),
 MessagesPlaceholder("chat_history"),
 ("human", "{input}"),
 ]
 )
 return create_history_aware_retriever(chat_model, retriever, contextualize_q_prompt)

def create_qa_chain(chat_model):
 qa_system_prompt = (
 "You are an helpful assistant named 'Shield' and your task is to answer any questions related to AI Safety for the given context."
 "Use the following pieces of retrieved context to answer the question."
 # "If any questions asked outside AI Safety context, just say that you are a specialist in AI Safety and can't answer that."
 # f"When introducing you, just say that you are an AI assistant powered by embedding model {embedding_model_name} and chat model {chat_model_name} and your knowledge is limited to 'Blueprint for an AI Bill of Rights' and 'NIST AI Standards' documents."
 "If you don't know the answer, just say that you don't know.\n\n"
 "{context}"
 )
 qa_prompt = ChatPromptTemplate.from_messages(
 [
 ("system", qa_system_prompt),
 MessagesPlaceholder("chat_history"),
 ("human", "{input}"),
 ]
 )
 return create_stuff_documents_chain(chat_model, qa_prompt)

def create_conversational_rag_chain(chat_model, retriever):
 history_aware_retriever = create_history_aware_retriever_self(chat_model, retriever)
 question_answer_chain = create_qa_chain(chat_model)
 return create_retrieval_chain(history_aware_retriever, question_answer_chain)

def get_session_history(session_id: str) -> BaseChatMessageHistory:
 if session_id not in store:
 store[session_id] = ChatMessageHistory()
 return store[session_id]


pdf_loader = PDFLoaderWrapper(
 documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF
)
documents = await pdf_loader.aload()

async def create_history_aware_rag_chain_with_params(huggingface_embedding,text_splitter,collection_name,compress:bool=False,conversational:bool =False):

 chunked_docs = text_splitter.split_documents(documents)
 dimension = 1024
 qdrant_server = os.environ["QDRANT_URL"]
 qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ["QDRANT_API_KEY"])

 # Below fails if collection already exists so make sure to delete the collection first
 qdrant_client.create_collection(
 collection_name=collection_name,
 vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),
 )

 vector_store = QdrantVectorStore(
 client=qdrant_client,
 collection_name=collection_name,
 embedding=huggingface_embedding,
 )
 vector_store.add_documents(chunked_docs)

 retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
 search_kwargs={'k':10,'score_threshold': 0.8})
 
 # Using the same model used in the app.
 chat_model_name = "gpt-4o"
 llm = ChatOpenAI(model=chat_model_name,temperature=0) 

 if compress:
 contextual_compressed_retriever = await get_contextual_compressed_retriever(retriever)
 
 if conversational:
 history_ai_safety_rag_chain = create_conversational_rag_chain(
 llm, 
 contextual_compressed_retriever if contextual_compressed_retriever else retriever)

 conversational_rag_chain = RunnableWithMessageHistory(
 history_ai_safety_rag_chain,
 get_session_history,
 input_messages_key="input",
 history_messages_key="chat_history",
 output_messages_key="answer",
 )
 else:
 RAG_PROMPT = """\
 Given a provided context and question, you must answer the question based only on context.

 If you cannot answer the question based on the context - you must say "I don't know".

 Context: {context}
 Question: {question}
 """

 rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
 ai_safety_rag_chain = (
 {"context": itemgetter("question") | retriever, "question": itemgetter("input")}
 | rag_prompt | llm | StrOutputParser()
 )

 ret = contextual_compressed_retriever if contextual_compressed_retriever else retriever
 chain = conversational_rag_chain if conversational_rag_chain else ai_safety_rag_chain
 return chain, ret



# RAGAS Evaluation for Embedding Model

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load the embedding models
base_embedding_model = HuggingFaceEmbeddings(model_name="Snowflake/snowflake-arctic-embed-l")
fine_tuned_embedding_model = HuggingFaceEmbeddings(model_name="")

# Common splitter to keep variables minimum
recursive_text_splitter = RecursiveCharacterTextSplitter(
 chunk_size = 1024,
 chunk_overlap = 100,
 length_function = len,
)
recursive_chunked_docs = recursive_text_splitter.split_documents(documents)

# Create two rag chaings with different embeddings
rag_chain_base, retriever_base = await create_history_aware_rag_chain_with_params(
 base_embedding_model,
 recursive_text_splitter,
 "aichatty-snowflake-arctic-embed-l-recursive-base"
)

rag_chain_fine_tuned, retriever_fine_tuned = await create_history_aware_rag_chain_with_params(
 fine_tuned_embedding_model,
 recursive_text_splitter,
 "aichatty-snowflake-arctic-embed-l-recursive-ft"
)


In [None]:
# First Base model chain

from datasets import Dataset
import time
import uuid
from ragas import evaluate
from ragas.metrics import (
 faithfulness,
 answer_relevancy,
 answer_correctness,
 context_recall,
 context_precision,
)

answers = []
contexts = []

for question in test_questions:
 store = {}
 session_id = str(uuid.uuid4())

 response = rag_chain_base.invoke({"input" : question}, config={"configurable": {"session_id": session_id}})
 answers.append(response["answer"])
 contexts.append([context.page_content for context in response["context"]])

base_chain_response_dataset = Dataset.from_dict({
 "question" : test_questions,
 "answer" : answers,
 "contexts" : contexts,
 "ground_truth" : test_groundtruths
})

metrics = [
 faithfulness,
 answer_relevancy,
 context_recall,
 context_precision,
 answer_correctness,
]
base_chain_eval_results = evaluate(base_chain_response_dataset, metrics)

base_chain_eval_results_df = base_chain_eval_results.to_pandas()
base_chain_eval_results_df

In [None]:
# For Fine Tuned model chain

answers = []
contexts = []

for question in test_questions:
 store = {}
 session_id = str(uuid.uuid4())

 response = rag_chain_fine_tuned.invoke({"input" : question}, config={"configurable": {"session_id": session_id}})
 answers.append(response["answer"])
 contexts.append([context.page_content for context in response["context"]])

ft_chain_response_dataset = Dataset.from_dict({
 "question" : test_questions,
 "answer" : answers,
 "contexts" : contexts,
 "ground_truth" : test_groundtruths
})

ft_chain_eval_results = evaluate(ft_chain_response_dataset, metrics)

ft_chain_eval_results_df = ft_chain_eval_results.to_pandas()
ft_chain_eval_results_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your dataframes here

# Merge on the 'question' column or another common identifier
merged_df = pd.merge(base_chain_eval_results_df, ft_chain_eval_results_df, on='question', suffixes=('_base', '_finetuned'))

# Now, let's calculate the improvement in relevant metrics, e.g., answer_correctness
merged_df['improvement_answer_correctness'] = merged_df['answer_correctness_finetuned'] - merged_df['answer_correctness_base']

# Plotting the comparison of correctness between the two models
plt.figure(figsize=(10, 6))
plt.plot(merged_df['question'], merged_df['answer_correctness_base'], label='Base Model', marker='o')
plt.plot(merged_df['question'], merged_df['answer_correctness_finetuned'], label='Fine-tuned Model', marker='x')
plt.xlabel('Questions')
plt.ylabel('Answer Correctness')
plt.title('Comparison of Base Model and Fine-tuned Model on Answer Correctness')
plt.xticks(rotation=90)
plt.legend()
plt.tight_layout()
plt.show()

# Plotting the improvement
plt.figure(figsize=(10, 6))
plt.bar(merged_df['question'], merged_df['improvement_answer_correctness'], color='green')
plt.xlabel('Questions')
plt.ylabel('Improvement in Answer Correctness')
plt.title('Improvement in Answer Correctness after Fine-tuning')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()


# RAGAS Evaluation for chunking strategy

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Using fine_tuned_embedding_model from before

# First Splitter RecursiveCharacterTextSplitter
recursive_text_splitter = RecursiveCharacterTextSplitter(
 chunk_size = 1024,
 chunk_overlap = 100,
 length_function = len,
)
recursive_chunked_docs = recursive_text_splitter.split_documents(documents)

# Create two rag chaings with different embeddings
conversational_rag_chain_base, contextual_compressed_retriever_base, retriever_base = await create_history_aware_rag_chain_with_params(
 base_embedding_model,recursive_text_splitter,"aichatty-snowflake-arctic-embed-l-recursive-base"
)

conversational_rag_chain_fine_tuned, contextual_compressed_retriever_fine_tuned, retriever_fine_tuned = await create_history_aware_rag_chain_with_params(
 fine_tuned_embedding_model,recursive_text_splitter,"aichatty-snowflake-arctic-embed-l-recursive-ft"
)
