Update README.md
Browse files
README.md
CHANGED
@@ -189,7 +189,7 @@ To each task corresponds a sequence of tokens that are given to the decoder as *
|
|
189 |
|
190 |
# Usage
|
191 |
|
192 |
-
To transcribe or translate audio files, the model has to be used along a `
|
193 |
|
194 |
|
195 |
## Transcription
|
@@ -238,7 +238,7 @@ transcription.
|
|
238 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
239 |
>>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
240 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
241 |
-
>>>
|
242 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
243 |
>>> predicted_ids = model.generate(input_features)
|
244 |
>>> transcription = processor.batch_decode(predicted_ids)
|
@@ -268,7 +268,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
268 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
269 |
>>> # tokenize
|
270 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
271 |
-
>>> forced_decoder_ids = processor.
|
272 |
|
273 |
>>> predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
|
274 |
>>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
|
|
|
189 |
|
190 |
# Usage
|
191 |
|
192 |
+
To transcribe or translate audio files, the model has to be used along a `WhisperProcessor`. The `WhisperProcessor.get_decoder_prompt_ids` function is used to get a list of `( idx, token )` tuples, which can either be set in the config, or directly passed to the generate function, as `forced_decoder_ids`.
|
193 |
|
194 |
|
195 |
## Transcription
|
|
|
238 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
239 |
>>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
240 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
241 |
+
>>> model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language = "fr", task = "transcribe")
|
242 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
243 |
>>> predicted_ids = model.generate(input_features)
|
244 |
>>> transcription = processor.batch_decode(predicted_ids)
|
|
|
268 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
269 |
>>> # tokenize
|
270 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
271 |
+
>>> forced_decoder_ids = processor.get_decoder_prompt_ids(language = "fr", task = "translate")
|
272 |
|
273 |
>>> predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
|
274 |
>>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
|