law-bot / make_corpus.py
anpigon's picture
chore: Add evaluation data files
3e40865
raw
history blame contribute delete
No virus
523 Bytes
import pickle
from langchain.schema import Document
from autorag.data.corpus import langchain_documents_to_parquet
with open("/Users/anpigon/Documents/Embed/ᄇα…₯ᆸ원ᄑᅑᆫ례/cases.pkl", "rb") as file:
data = pickle.load(file)
print(len(data)) # 총 2736개의 배치
docs = []
for i in range(100):
for sentence in data[i][1]:
print(sentence)
doc = Document(page_content=sentence)
docs.append(doc)
langchain_documents_to_parquet(docs, "evaluation/data/corpus.parquet")