MahmoudAshraf commited on
Commit
4f7a07c
1 Parent(s): 4d9d54e

update python usage instructions

Browse files
Files changed (1) hide show
  1. README.md +9 -12
README.md CHANGED
@@ -177,6 +177,7 @@ pip install git+https://github.com/MahmoudAshraf97/ctc-forced-aligner.git
177
  ## Usage
178
 
179
  ```python
 
180
  from ctc_forced_aligner import (
181
  load_audio,
182
  load_alignment_model,
@@ -190,35 +191,32 @@ from ctc_forced_aligner import (
190
  audio_path = "your/audio/path"
191
  text_path = "your/text/path"
192
  language = "iso" # ISO-639-3 Language code
 
 
193
 
194
- audio_waveform = load_audio(audio_path, model.dtype, model.device)
195
-
196
- emissions, stride = generate_emissions(
197
- model, audio_waveform, args.window_size, args.context_size, args.batch_size
198
- )
199
-
200
- with open(text_path, "r") as f:
201
- lines = f.readlines()
202
- text = "".join(line for line in lines).replace("\n", " ").strip()
203
 
204
  alignment_model, alignment_tokenizer, alignment_dictionary = load_alignment_model(
205
  device,
206
  dtype=torch.float16 if device == "cuda" else torch.float32,
207
  )
208
 
 
 
 
 
 
 
209
 
210
  emissions, stride = generate_emissions(
211
  alignment_model, audio_waveform, batch_size=batch_size
212
  )
213
 
214
-
215
  tokens_starred, text_starred = preprocess_text(
216
  text,
217
  romanize=True,
218
  language=language,
219
  )
220
 
221
-
222
  segments, scores, blank_id = get_alignments(
223
  emissions,
224
  tokens_starred,
@@ -228,5 +226,4 @@ segments, scores, blank_id = get_alignments(
228
  spans = get_spans(tokens_starred, segments, alignment_tokenizer.decode(blank_id))
229
 
230
  word_timestamps = postprocess_results(text_starred, spans, stride, scores)
231
-
232
  ```
 
177
  ## Usage
178
 
179
  ```python
180
+ import torch
181
  from ctc_forced_aligner import (
182
  load_audio,
183
  load_alignment_model,
 
191
  audio_path = "your/audio/path"
192
  text_path = "your/text/path"
193
  language = "iso" # ISO-639-3 Language code
194
+ device = "cuda" if torch.cuda.is_available() else "cpu"
195
+ batch_size = 16
196
 
 
 
 
 
 
 
 
 
 
197
 
198
  alignment_model, alignment_tokenizer, alignment_dictionary = load_alignment_model(
199
  device,
200
  dtype=torch.float16 if device == "cuda" else torch.float32,
201
  )
202
 
203
+ audio_waveform = load_audio(audio_path, alignment_model.dtype, alignment_model.device)
204
+
205
+
206
+ with open(text_path, "r") as f:
207
+ lines = f.readlines()
208
+ text = "".join(line for line in lines).replace("\n", " ").strip()
209
 
210
  emissions, stride = generate_emissions(
211
  alignment_model, audio_waveform, batch_size=batch_size
212
  )
213
 
 
214
  tokens_starred, text_starred = preprocess_text(
215
  text,
216
  romanize=True,
217
  language=language,
218
  )
219
 
 
220
  segments, scores, blank_id = get_alignments(
221
  emissions,
222
  tokens_starred,
 
226
  spans = get_spans(tokens_starred, segments, alignment_tokenizer.decode(blank_id))
227
 
228
  word_timestamps = postprocess_results(text_starred, spans, stride, scores)
 
229
  ```