Spaces:
Paused
Paused
from enum import Enum | |
from typing import List | |
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain_core.documents import Document | |
import asyncio | |
class PDFLoaderWrapper(): | |
class LoaderType(str, Enum): | |
PYMUPDF = "pymupdf" | |
def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF): | |
self.file_path = file_path if isinstance(file_path, list) else [file_path] | |
self.loader_type = loader_type | |
async def aload(self) -> List[Document]: | |
all_docs = [] | |
for file_path in self.file_path: | |
if self.loader_type == self.LoaderType.PYMUPDF: | |
try: | |
loader = PyMuPDFLoader(file_path) | |
docs = await loader.aload() | |
all_docs.extend(docs) | |
except Exception as e: | |
print(f"Error loading file {file_path}: {e}") | |
continue | |
return all_docs | |