Update readme, `whisper-large` -> `whisper-large-v2` (#4)
Browse files- Update readme, `whisper-large` -> `whisper-large-v2` (6827ce791b91ce7af083878b8a153bcd768c1c74)
README.md
CHANGED
@@ -174,8 +174,8 @@ The "<|en|>" token is used to specify that the speech is in english and should b
|
|
174 |
>>> import torch
|
175 |
|
176 |
>>> # load model and processor
|
177 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
178 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
179 |
|
180 |
>>> # load dummy dataset and read soundfiles
|
181 |
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
@@ -199,8 +199,8 @@ transcription.
|
|
199 |
>>> import torch
|
200 |
|
201 |
>>> # load model and processor
|
202 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
203 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
204 |
|
205 |
>>> # load dummy dataset and read soundfiles
|
206 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
@@ -227,8 +227,8 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
227 |
>>> import torch
|
228 |
|
229 |
>>> # load model and processor
|
230 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
231 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
232 |
|
233 |
>>> # load dummy dataset and read soundfiles
|
234 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
@@ -245,7 +245,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
245 |
|
246 |
## Evaluation
|
247 |
|
248 |
-
This code snippet shows how to evaluate **openai/whisper-large** on LibriSpeech's "clean" and "other" test data.
|
249 |
|
250 |
```python
|
251 |
>>> from datasets import load_dataset
|
@@ -257,8 +257,8 @@ This code snippet shows how to evaluate **openai/whisper-large** on LibriSpeech'
|
|
257 |
|
258 |
>>> librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
|
259 |
|
260 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large").to("cuda")
|
261 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
262 |
|
263 |
>>> def map_to_pred(batch):
|
264 |
>>> input_features = processor(batch["audio"]["array"], return_tensors="pt").input_features
|
|
|
174 |
>>> import torch
|
175 |
|
176 |
>>> # load model and processor
|
177 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
178 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
179 |
|
180 |
>>> # load dummy dataset and read soundfiles
|
181 |
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
|
|
199 |
>>> import torch
|
200 |
|
201 |
>>> # load model and processor
|
202 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
203 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
204 |
|
205 |
>>> # load dummy dataset and read soundfiles
|
206 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
|
|
227 |
>>> import torch
|
228 |
|
229 |
>>> # load model and processor
|
230 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
231 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
232 |
|
233 |
>>> # load dummy dataset and read soundfiles
|
234 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
|
|
245 |
|
246 |
## Evaluation
|
247 |
|
248 |
+
This code snippet shows how to evaluate **openai/whisper-large-v2** on LibriSpeech's "clean" and "other" test data.
|
249 |
|
250 |
```python
|
251 |
>>> from datasets import load_dataset
|
|
|
257 |
|
258 |
>>> librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
|
259 |
|
260 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2").to("cuda")
|
261 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
262 |
|
263 |
>>> def map_to_pred(batch):
|
264 |
>>> input_features = processor(batch["audio"]["array"], return_tensors="pt").input_features
|