bbaguette
Browse files- eval.py +1 -1
- run_speech_recognition_ctc.py +1 -1
eval.py
CHANGED
@@ -102,7 +102,7 @@ def main(args):
|
|
102 |
# map function to decode audio
|
103 |
def map_to_pred(batch):
|
104 |
prediction = asr(
|
105 |
-
batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
|
106 |
)
|
107 |
|
108 |
batch["prediction"] = prediction["text"]# "".join(prediction["text"].split("<s>"))
|
|
|
102 |
# map function to decode audio
|
103 |
def map_to_pred(batch):
|
104 |
prediction = asr(
|
105 |
+
batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
|
106 |
)
|
107 |
|
108 |
batch["prediction"] = prediction["text"]# "".join(prediction["text"].split("<s>"))
|
run_speech_recognition_ctc.py
CHANGED
@@ -643,7 +643,7 @@ def main():
|
|
643 |
|
644 |
pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
|
645 |
|
646 |
-
pred_str = tokenizer.batch_decode(pred_ids
|
647 |
# we do not want to group tokens when computing the metrics
|
648 |
label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
|
649 |
|
|
|
643 |
|
644 |
pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
|
645 |
|
646 |
+
pred_str = tokenizer.batch_decode(pred_ids)
|
647 |
# we do not want to group tokens when computing the metrics
|
648 |
label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
|
649 |
|