Spaces:

anpigon
/

law-bot

Runtime error

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "question": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "ground_truth": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "answer": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "contexts": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

evaluation_data/dataset/state.json ADDED Viewed

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "0b75232bae9cb3e7",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

make_corpus.py ADDED Viewed

+import pickle
+from langchain.schema import Document
+from autorag.data.corpus import langchain_documents_to_parquet
+with open("/Users/anpigon/Documents/Embed/법원판례/cases.pkl", "rb") as file:
+    data = pickle.load(file)
+print(len(data))  # 총 2736개의 배치
+docs = []
+for i in range(100):
+    for sentence in data[i][1]:
+        print(sentence)
+        doc = Document(page_content=sentence)
+        docs.append(doc)
+langchain_documents_to_parquet(docs, "evaluation/data/corpus.parquet")