Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
•
a52c513
1
Parent(s):
e0ada71
Adding another check to see if live before computing dset peek
Browse files
data_measurements/dataset_statistics.py
CHANGED
@@ -554,11 +554,12 @@ class DatasetStatisticsCacheClass:
|
|
554 |
with open(self.dset_peek_json_fid, "r") as f:
|
555 |
self.dset_peek = json.load(f)["dset peek"]
|
556 |
else:
|
557 |
-
if self.
|
558 |
-
self.
|
559 |
-
|
560 |
-
|
561 |
-
|
|
|
562 |
|
563 |
def load_or_prepare_tokenized_df(self, save=True):
|
564 |
if self.use_cache and exists(self.tokenized_df_fid):
|
|
|
554 |
with open(self.dset_peek_json_fid, "r") as f:
|
555 |
self.dset_peek = json.load(f)["dset peek"]
|
556 |
else:
|
557 |
+
if not self.live:
|
558 |
+
if self.dset is None:
|
559 |
+
self.get_base_dataset()
|
560 |
+
self.dset_peek = self.dset[:100]
|
561 |
+
if save:
|
562 |
+
write_json({"dset peek": self.dset_peek}, self.dset_peek_json_fid)
|
563 |
|
564 |
def load_or_prepare_tokenized_df(self, save=True):
|
565 |
if self.use_cache and exists(self.tokenized_df_fid):
|