update
Browse files- generation_config.json +1 -1
- run_distillation_nodes.py +1 -1
- tokenizer.json +10 -10
generation_config.json
CHANGED
@@ -165,7 +165,7 @@
|
|
165 |
"<|yue|>": 50358,
|
166 |
"<|zh|>": 50260
|
167 |
},
|
168 |
-
"language": "<|
|
169 |
"max_initial_timestamp_index": 1,
|
170 |
"max_length": 448,
|
171 |
"no_timestamps_token_id": 50364,
|
|
|
165 |
"<|yue|>": 50358,
|
166 |
"<|zh|>": 50260
|
167 |
},
|
168 |
+
"language": "<|no|>",
|
169 |
"max_initial_timestamp_index": 1,
|
170 |
"max_length": 448,
|
171 |
"no_timestamps_token_id": 50364,
|
run_distillation_nodes.py
CHANGED
@@ -610,7 +610,7 @@ def get_data_loader(
|
|
610 |
|
611 |
data_loader = DataLoader(
|
612 |
dataset,
|
613 |
-
batch_size=
|
614 |
drop_last=drop_last,
|
615 |
pin_memory=pin_memory,
|
616 |
collate_fn=data_collator,
|
|
|
610 |
|
611 |
data_loader = DataLoader(
|
612 |
dataset,
|
613 |
+
batch_size=batch_size //num_of_hosts,
|
614 |
drop_last=drop_last,
|
615 |
pin_memory=pin_memory,
|
616 |
collate_fn=data_collator,
|
tokenizer.json
CHANGED
@@ -14503,7 +14503,7 @@
|
|
14503 |
},
|
14504 |
{
|
14505 |
"SpecialToken": {
|
14506 |
-
"id": "<|
|
14507 |
"type_id": 0
|
14508 |
}
|
14509 |
},
|
@@ -14541,7 +14541,7 @@
|
|
14541 |
},
|
14542 |
{
|
14543 |
"SpecialToken": {
|
14544 |
-
"id": "<|
|
14545 |
"type_id": 0
|
14546 |
}
|
14547 |
},
|
@@ -14586,22 +14586,22 @@
|
|
14586 |
"<|endoftext|>"
|
14587 |
]
|
14588 |
},
|
14589 |
-
"<|
|
14590 |
-
"id": "<|
|
14591 |
"ids": [
|
14592 |
-
|
14593 |
],
|
14594 |
"tokens": [
|
14595 |
-
"<|
|
14596 |
]
|
14597 |
},
|
14598 |
-
"<|
|
14599 |
-
"id": "<|
|
14600 |
"ids": [
|
14601 |
-
|
14602 |
],
|
14603 |
"tokens": [
|
14604 |
-
"<|
|
14605 |
]
|
14606 |
},
|
14607 |
"<|startoftranscript|>": {
|
|
|
14503 |
},
|
14504 |
{
|
14505 |
"SpecialToken": {
|
14506 |
+
"id": "<|no|>",
|
14507 |
"type_id": 0
|
14508 |
}
|
14509 |
},
|
|
|
14541 |
},
|
14542 |
{
|
14543 |
"SpecialToken": {
|
14544 |
+
"id": "<|no|>",
|
14545 |
"type_id": 0
|
14546 |
}
|
14547 |
},
|
|
|
14586 |
"<|endoftext|>"
|
14587 |
]
|
14588 |
},
|
14589 |
+
"<|notimestamps|>": {
|
14590 |
+
"id": "<|notimestamps|>",
|
14591 |
"ids": [
|
14592 |
+
50364
|
14593 |
],
|
14594 |
"tokens": [
|
14595 |
+
"<|notimestamps|>"
|
14596 |
]
|
14597 |
},
|
14598 |
+
"<|no|>": {
|
14599 |
+
"id": "<|no|>",
|
14600 |
"ids": [
|
14601 |
+
50288
|
14602 |
],
|
14603 |
"tokens": [
|
14604 |
+
"<|no|>"
|
14605 |
]
|
14606 |
},
|
14607 |
"<|startoftranscript|>": {
|