ai-safety-chatty / pdfloader.py
jeevan
recommit
bc453aa
raw
history blame
973 Bytes
from enum import Enum
from typing import List
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.documents import Document
import asyncio
class PDFLoaderWrapper():
class LoaderType(str, Enum):
PYMUPDF = "pymupdf"
def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF):
self.file_path = file_path if isinstance(file_path, list) else [file_path]
self.loader_type = loader_type
async def aload(self) -> List[Document]:
all_docs = []
for file_path in self.file_path:
if self.loader_type == self.LoaderType.PYMUPDF:
try:
loader = PyMuPDFLoader(file_path)
docs = await loader.aload()
all_docs.extend(docs)
except Exception as e:
print(f"Error loading file {file_path}: {e}")
continue
return all_docs