{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Synthetic data generation using Ragas framework" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "> Python packages are installed from `requirements.txt` file into virtual environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install -qU langsmith langchain-core langchain-community langchain-openai langchain-qdrant langchain_experimental pymupdf ragas" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aiofiles==23.2.1\n", "aiohappyeyeballs==2.4.0\n", "aiohttp==3.10.5\n", "aiosignal==1.3.1\n", "annotated-types==0.7.0\n", "anyio==3.7.1\n", "appdirs==1.4.4\n", "appnope==0.1.4\n", "asttokens==2.4.1\n", "asyncer==0.0.2\n", "attrs==24.2.0\n", "bidict==0.23.1\n", "certifi==2024.8.30\n", "chainlit==0.7.700\n", "charset-normalizer==3.3.2\n", "click==8.1.7\n", "comm==0.2.2\n", "dataclasses-json==0.5.14\n", "datasets==3.0.0\n", "debugpy==1.8.5\n", "decorator==5.1.1\n", "Deprecated==1.2.14\n", "dill==0.3.8\n", "distro==1.9.0\n", "executing==2.1.0\n", "fastapi==0.100.1\n", "fastapi-socketio==0.0.10\n", "filelock==3.16.1\n", "filetype==1.2.0\n", "frozenlist==1.4.1\n", "fsspec==2024.6.1\n", "googleapis-common-protos==1.65.0\n", "grpcio==1.66.1\n", "grpcio-tools==1.62.3\n", "h11==0.14.0\n", "h2==4.1.0\n", "hpack==4.0.0\n", "httpcore==0.17.3\n", "httpx==0.24.1\n", "huggingface-hub==0.25.0\n", "hyperframe==6.0.1\n", "idna==3.10\n", "importlib_metadata==8.4.0\n", "ipykernel==6.29.5\n", "ipython==8.27.0\n", "jedi==0.19.1\n", "Jinja2==3.1.4\n", "jiter==0.5.0\n", "joblib==1.4.2\n", "jsonpatch==1.33\n", "jsonpointer==3.0.0\n", "jupyter_client==8.6.3\n", "jupyter_core==5.7.2\n", "langchain==0.3.0\n", "langchain-community==0.3.0\n", "langchain-core==0.3.5\n", "langchain-experimental==0.3.0\n", "langchain-huggingface==0.1.0\n", "langchain-openai==0.2.0\n", "langchain-qdrant==0.1.4\n", "langchain-text-splitters==0.3.0\n", "langsmith==0.1.125\n", "Lazify==0.4.0\n", "MarkupSafe==2.1.5\n", "marshmallow==3.22.0\n", "matplotlib-inline==0.1.7\n", "mpmath==1.3.0\n", "multidict==6.1.0\n", "multiprocess==0.70.16\n", "mypy-extensions==1.0.0\n", "nest-asyncio==1.6.0\n", "networkx==3.3\n", "numpy==1.26.4\n", "openai==1.44.1\n", "opentelemetry-api==1.27.0\n", "opentelemetry-exporter-otlp==1.27.0\n", "opentelemetry-exporter-otlp-proto-common==1.27.0\n", "opentelemetry-exporter-otlp-proto-grpc==1.27.0\n", "opentelemetry-exporter-otlp-proto-http==1.27.0\n", "opentelemetry-instrumentation==0.48b0\n", "opentelemetry-proto==1.27.0\n", "opentelemetry-sdk==1.27.0\n", "opentelemetry-semantic-conventions==0.48b0\n", "orjson==3.10.7\n", "packaging==23.2\n", "pandas==2.2.3\n", "parso==0.8.4\n", "pexpect==4.9.0\n", "pillow==10.4.0\n", "platformdirs==4.3.6\n", "portalocker==2.10.1\n", "prompt_toolkit==3.0.47\n", "protobuf==4.25.5\n", "psutil==6.0.0\n", "ptyprocess==0.7.0\n", "pure_eval==0.2.3\n", "pyarrow==17.0.0\n", "pydantic==2.9.2\n", "pydantic-settings==2.5.2\n", "pydantic_core==2.23.4\n", "Pygments==2.18.0\n", "PyJWT==2.9.0\n", "PyMuPDF==1.24.10\n", "pymupdf4llm==0.0.17\n", "PyMuPDFb==1.24.10\n", "pypdf==4.3.1\n", "pysbd==0.3.4\n", "python-dateutil==2.9.0.post0\n", "python-dotenv==1.0.1\n", "python-engineio==4.9.1\n", "python-graphql-client==0.4.3\n", "python-multipart==0.0.6\n", "python-socketio==5.11.4\n", "pytz==2024.2\n", "PyYAML==6.0.2\n", "pyzmq==26.2.0\n", "qdrant-client==1.11.2\n", "ragas==0.1.19\n", "regex==2024.9.11\n", "requests==2.32.3\n", "safetensors==0.4.5\n", "scikit-learn==1.5.2\n", "scipy==1.14.1\n", "sentence-transformers==3.1.1\n", "simple-websocket==1.0.0\n", "six==1.16.0\n", "sniffio==1.3.1\n", "SQLAlchemy==2.0.35\n", "stack-data==0.6.3\n", "starlette==0.27.0\n", "sympy==1.13.3\n", "syncer==2.0.3\n", "tenacity==8.5.0\n", "threadpoolctl==3.5.0\n", "tiktoken==0.7.0\n", "tokenizers==0.19.1\n", "tomli==2.0.1\n", "torch==2.4.1\n", "tornado==6.4.1\n", "tqdm==4.66.5\n", "traitlets==5.14.3\n", "transformers==4.44.2\n", "typing-inspect==0.9.0\n", "typing_extensions==4.12.2\n", "tzdata==2024.1\n", "uptrace==1.26.0\n", "urllib3==2.2.3\n", "uvicorn==0.23.2\n", "watchfiles==0.20.0\n", "wcwidth==0.2.13\n", "websockets==13.1\n", "wrapt==1.16.0\n", "wsproto==1.2.0\n", "xxhash==3.5.0\n", "yarl==1.11.1\n", "zipp==3.20.2\n" ] } ], "source": [ "!pip freeze\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import getpass\n", "from uuid import uuid4\n", "\n", "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"LangChain API Key:\")\n", "\n", "os.environ[\"LANGCHAIN_PROJECT\"] = \"AIM-SDG-MidTerm - AI Safety\"\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", "\n", "os.environ[\"QDRANT_API_KEY\"] = getpass.getpass(\"Enter Your Qdrant API Key: \")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pdfloader import PDFLoaderWrapper\n", "from langchain_experimental.text_splitter import SemanticChunker\n", "\n", "BOR_FILE_PATH = \"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf\"\n", "NIST_FILE_PATH = \"https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf\"\n", "SMALL_DOC = \"https://arxiv.org/pdf/1908.10084\" \n", "documents_to_preload = [\n", " BOR_FILE_PATH,\n", " NIST_FILE_PATH\n", " # SMALL_DOC\n", "]\n", "\n", "pdf_loader = PDFLoaderWrapper(\n", " documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF\n", ")\n", "documents = await pdf_loader.aload()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print (\"Importing packages\")\n", "from ragas.testset.generator import TestsetGenerator\n", "from ragas.testset.evolutions import simple, reasoning, multi_context\n", "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", "from ragas.testset.docstore import Document, DocumentStore,InMemoryDocumentStore\n", "from langchain_experimental.text_splitter import SemanticChunker\n", "from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline\n", "from ragas.testset.extractor import KeyphraseExtractor\n", "\n", "print (\"Packages import complete\")\n", "print (\"Getting the Embedding model from Huggingface\")\n", "# Using best performing embedding model from hugging face to generate quality dataset.\n", "# Need GPU\n", "model_name = \"Snowflake/snowflake-arctic-embed-l\"\n", "embedding_model = HuggingFaceEmbeddings(model_name=model_name)\n", "print (\"Embedding model loaded\")\n", "\n", "print (\"Splitting the documents into semantic chunks\")\n", "text_splitter = SemanticChunker(embedding_model, breakpoint_threshold_type=\"percentile\",breakpoint_threshold_amount=90)\n", "chunked_docs = text_splitter.split_documents(documents)\n", "\n", "print (\"Creating the document store for ragas and loading LLM models\")\n", "generator_llm = ChatOpenAI(model=\"gpt-4o-mini\")\n", "critic_llm = ChatOpenAI(model=\"gpt-4o\")\n", "\n", "keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)\n", "docstore = InMemoryDocumentStore(splitter=text_splitter,extractor=keyphrase_extractor, embeddings=embedding_model)\n", "\n", "\n", "print (\"Creating the testset generator\")\n", "generator = TestsetGenerator.from_langchain( # Default uses TokenTextSplitter\n", " generator_llm=generator_llm,\n", " critic_llm=critic_llm,\n", " embeddings=embedding_model,\n", " docstore=docstore # Document store uses SemenaticChunker\n", ")\n", "\n", "distributions = {\n", " simple: 0.5,\n", " multi_context: 0.3,\n", " reasoning: 0.2\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tests_per_doc = 2 \n", "test_size = tests_per_doc * len(documents)\n", "\n", "testset = generator.generate_with_langchain_docs(\n", " documents, \n", " test_size, \n", " distributions, \n", " with_debugging_logs=True\n", ") # Default RunConfig(max_retries=15, max_wait=90)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "testset.to_pandas()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langsmith import Client\n", "\n", "client = Client()\n", "\n", "dataset_name = \"AI Safety\"\n", "\n", "dataset = client.create_dataset(\n", " dataset_name=dataset_name,\n", " description=\"Questions about AI Safety\"\n", ")\n", "\n", "for test in testset.to_pandas().iterrows():\n", " client.create_example(\n", " inputs={\n", " \"question\": test[1][\"question\"]\n", " },\n", " outputs={\n", " \"answer\": test[1][\"ground_truth\"]\n", " },\n", " metadata={\n", " \"context\": test[0]\n", " },\n", " dataset_id=dataset.id\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Create Rag chain to generate answers for above questions in the dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "> Note that we are usig Qdrant cloud where the pdf document is processed and saved for us to consume. For the RAG pipeline we use the same embedding model originally used to populate the Qdrant vectorstore." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain_qdrant import QdrantVectorStore\n", "from langchain_core.documents import Document\n", "from qdrant_client import QdrantClient\n", "from qdrant_client.http.models import Distance, VectorParams\n", "\n", "dimension = 1024\n", "collection_name = \"ai-safety-sr-arctic-embed-l-semantic\"\n", "qdrant_server = \"https://500cb0e8-ea08-4662-b4f2-3eca11e635da.europe-west3-0.gcp.cloud.qdrant.io:6333\"\n", "qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ[\"QDRANT_API_KEY\"])\n", "qdrant_client.create_collection(\n", " collection_name=collection_name,\n", " vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),\n", ")\n", "\n", "vector_store = QdrantVectorStore(\n", " client=qdrant_client,\n", " collection_name=collection_name,\n", " embedding=embedding_model,\n", ")\n", "\n", "retriever = vector_store.as_retriever()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.prompts import ChatPromptTemplate\n", "\n", "RAG_PROMPT = \"\"\"\\\n", "Given a provided context and question, you must answer the question based only on context.\n", "\n", "If you cannot answer the question based on the context - you must say \"I don't know\".\n", "\n", "Context: {context}\n", "Question: {question}\n", "\"\"\"\n", "\n", "rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain_openai import ChatOpenAI\n", "\n", "# Using the same model used in the app.\n", "chat_model_name = \"gpt-4o\"\n", "llm = ChatOpenAI(model=chat_model_name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from operator import itemgetter\n", "from langchain_core.runnables import RunnablePassthrough, RunnableParallel\n", "from langchain.schema import StrOutputParser\n", "\n", "ai_safety_rag_chain = (\n", " {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n", " | rag_prompt | llm | StrOutputParser()\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ai_safety_rag_chain.invoke({\"question\" : \"What steps can organizations take to minimize bias in AI models?\"})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# LangSmith Evaluation setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langsmith.evaluation import LangChainStringEvaluator, evaluate\n", "\n", "eval_llm = ChatOpenAI(model=\"gpt-4o\")\n", "\n", "# Evaluators\n", "qa_evaluator = LangChainStringEvaluator(\"qa\", config={\"llm\" : eval_llm})\n", "\n", "# Faithfulness Evaluator\n", "# Checks whether the generated answer is faithful to the provided source material or context.\n", "faithfulness_evaluator = LangChainStringEvaluator(\n", " \"criteria\",\n", " config={\n", " \"criteria\": {\n", " \"faithfulness\": (\n", " \"Is the answer faithful to the given context?\"\n", " )\n", " },\n", " \"llm\": eval_llm\n", " },\n", " prepare_data=lambda run, example: {\n", " \"prediction\": run.outputs[\"output\"],\n", " \"reference\": example.outputs[\"answer\"],\n", " \"input\": example.inputs[\"question\"],\n", " }\n", ")\n", "\n", "# Answer Relevancy Evaluator\n", "# Determines whether the answer is relevant to the user's question.\n", "answer_relevancy_evaluator = LangChainStringEvaluator(\n", " \"criteria\",\n", " config={\n", " \"criteria\": {\n", " \"relevancy\": (\n", " \"Does the answer address the question and provide relevant information?\"\n", " )\n", " },\n", " \"llm\": eval_llm\n", " },\n", " prepare_data=lambda run, example: {\n", " \"prediction\": run.outputs[\"output\"],\n", " \"reference\": example.outputs[\"answer\"],\n", " \"input\": example.inputs[\"question\"],\n", " }\n", ")\n", "\n", "# Context Precision Evaluator\n", "# Evaluates how precisely the answer uses information from the given context.\n", "context_precision_evaluator = LangChainStringEvaluator(\n", " \"criteria\",\n", " config={\n", " \"criteria\": {\n", " \"context_precision\": (\n", " \"Does the answer precisely use information from the provided context?\"\n", " )\n", " },\n", " \"llm\": eval_llm\n", " },\n", " prepare_data=lambda run, example: {\n", " \"prediction\": run.outputs[\"output\"],\n", " \"reference\": example.outputs[\"answer\"],\n", " \"input\": example.inputs[\"question\"],\n", " }\n", ")\n", "\n", "# Context Recall Evaluator\n", "# Determines if the answer recalls all the necessary and relevant information from the context.\n", "context_recall_evaluator = LangChainStringEvaluator(\n", " \"criteria\",\n", " config={\n", " \"criteria\": {\n", " \"context_recall\": (\n", " \"Does the answer recall all relevant information from the provided context?\"\n", " )\n", " },\n", " \"llm\": eval_llm\n", " },\n", " prepare_data=lambda run, example: {\n", " \"prediction\": run.outputs[\"output\"],\n", " \"reference\": example.outputs[\"answer\"],\n", " \"input\": example.inputs[\"question\"],\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "evaluate(\n", " ai_safety_rag_chain.invoke,\n", " data=dataset_name,\n", " evaluators=[\n", " qa_evaluator,\n", " faithfulness_evaluator,\n", " answer_relevancy_evaluator,\n", " context_precision_evaluator,\n", " context_recall_evaluator\n", " ],\n", " metadata={\"revision_id\": \"ai_safety_rag_chain\"},\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }