File size: 523 Bytes
3e40865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pickle
from langchain.schema import Document
from autorag.data.corpus import langchain_documents_to_parquet


with open("/Users/anpigon/Documents/Embed/법원판례/cases.pkl", "rb") as file:
    data = pickle.load(file)

print(len(data))  # 총 2736개의 배치

docs = []
for i in range(100):
    for sentence in data[i][1]:
        print(sentence)
        doc = Document(page_content=sentence)
        docs.append(doc)

langchain_documents_to_parquet(docs, "evaluation/data/corpus.parquet")