File size: 523 Bytes
3e40865 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import pickle
from langchain.schema import Document
from autorag.data.corpus import langchain_documents_to_parquet
with open("/Users/anpigon/Documents/Embed/법원판례/cases.pkl", "rb") as file:
data = pickle.load(file)
print(len(data)) # 총 2736개의 배치
docs = []
for i in range(100):
for sentence in data[i][1]:
print(sentence)
doc = Document(page_content=sentence)
docs.append(doc)
langchain_documents_to_parquet(docs, "evaluation/data/corpus.parquet")
|