pere commited on
Commit
eec9b81
1 Parent(s): 19d8a14
generation_config.json CHANGED
@@ -165,7 +165,7 @@
165
  "<|yue|>": 50358,
166
  "<|zh|>": 50260
167
  },
168
- "language": "<|en|>",
169
  "max_initial_timestamp_index": 1,
170
  "max_length": 448,
171
  "no_timestamps_token_id": 50364,
 
165
  "<|yue|>": 50358,
166
  "<|zh|>": 50260
167
  },
168
+ "language": "<|no|>",
169
  "max_initial_timestamp_index": 1,
170
  "max_length": 448,
171
  "no_timestamps_token_id": 50364,
run_distillation_nodes.py CHANGED
@@ -610,7 +610,7 @@ def get_data_loader(
610
 
611
  data_loader = DataLoader(
612
  dataset,
613
- batch_size=batch_sizei //num_of_hosts,
614
  drop_last=drop_last,
615
  pin_memory=pin_memory,
616
  collate_fn=data_collator,
 
610
 
611
  data_loader = DataLoader(
612
  dataset,
613
+ batch_size=batch_size //num_of_hosts,
614
  drop_last=drop_last,
615
  pin_memory=pin_memory,
616
  collate_fn=data_collator,
tokenizer.json CHANGED
@@ -14503,7 +14503,7 @@
14503
  },
14504
  {
14505
  "SpecialToken": {
14506
- "id": "<|en|>",
14507
  "type_id": 0
14508
  }
14509
  },
@@ -14541,7 +14541,7 @@
14541
  },
14542
  {
14543
  "SpecialToken": {
14544
- "id": "<|en|>",
14545
  "type_id": 0
14546
  }
14547
  },
@@ -14586,22 +14586,22 @@
14586
  "<|endoftext|>"
14587
  ]
14588
  },
14589
- "<|en|>": {
14590
- "id": "<|en|>",
14591
  "ids": [
14592
- 50259
14593
  ],
14594
  "tokens": [
14595
- "<|en|>"
14596
  ]
14597
  },
14598
- "<|notimestamps|>": {
14599
- "id": "<|notimestamps|>",
14600
  "ids": [
14601
- 50364
14602
  ],
14603
  "tokens": [
14604
- "<|notimestamps|>"
14605
  ]
14606
  },
14607
  "<|startoftranscript|>": {
 
14503
  },
14504
  {
14505
  "SpecialToken": {
14506
+ "id": "<|no|>",
14507
  "type_id": 0
14508
  }
14509
  },
 
14541
  },
14542
  {
14543
  "SpecialToken": {
14544
+ "id": "<|no|>",
14545
  "type_id": 0
14546
  }
14547
  },
 
14586
  "<|endoftext|>"
14587
  ]
14588
  },
14589
+ "<|notimestamps|>": {
14590
+ "id": "<|notimestamps|>",
14591
  "ids": [
14592
+ 50364
14593
  ],
14594
  "tokens": [
14595
+ "<|notimestamps|>"
14596
  ]
14597
  },
14598
+ "<|no|>": {
14599
+ "id": "<|no|>",
14600
  "ids": [
14601
+ 50288
14602
  ],
14603
  "tokens": [
14604
+ "<|no|>"
14605
  ]
14606
  },
14607
  "<|startoftranscript|>": {