meg-huggingface commited on
Commit
a52c513
1 Parent(s): e0ada71

Adding another check to see if live before computing dset peek

Browse files
data_measurements/dataset_statistics.py CHANGED
@@ -554,11 +554,12 @@ class DatasetStatisticsCacheClass:
554
  with open(self.dset_peek_json_fid, "r") as f:
555
  self.dset_peek = json.load(f)["dset peek"]
556
  else:
557
- if self.dset is None:
558
- self.get_base_dataset()
559
- self.dset_peek = self.dset[:100]
560
- if save:
561
- write_json({"dset peek": self.dset_peek}, self.dset_peek_json_fid)
 
562
 
563
  def load_or_prepare_tokenized_df(self, save=True):
564
  if self.use_cache and exists(self.tokenized_df_fid):
 
554
  with open(self.dset_peek_json_fid, "r") as f:
555
  self.dset_peek = json.load(f)["dset peek"]
556
  else:
557
+ if not self.live:
558
+ if self.dset is None:
559
+ self.get_base_dataset()
560
+ self.dset_peek = self.dset[:100]
561
+ if save:
562
+ write_json({"dset peek": self.dset_peek}, self.dset_peek_json_fid)
563
 
564
  def load_or_prepare_tokenized_df(self, save=True):
565
  if self.use_cache and exists(self.tokenized_df_fid):