MahmoudAshraf
commited on
Commit
•
4f7a07c
1
Parent(s):
4d9d54e
update python usage instructions
Browse files
README.md
CHANGED
@@ -177,6 +177,7 @@ pip install git+https://github.com/MahmoudAshraf97/ctc-forced-aligner.git
|
|
177 |
## Usage
|
178 |
|
179 |
```python
|
|
|
180 |
from ctc_forced_aligner import (
|
181 |
load_audio,
|
182 |
load_alignment_model,
|
@@ -190,35 +191,32 @@ from ctc_forced_aligner import (
|
|
190 |
audio_path = "your/audio/path"
|
191 |
text_path = "your/text/path"
|
192 |
language = "iso" # ISO-639-3 Language code
|
|
|
|
|
193 |
|
194 |
-
audio_waveform = load_audio(audio_path, model.dtype, model.device)
|
195 |
-
|
196 |
-
emissions, stride = generate_emissions(
|
197 |
-
model, audio_waveform, args.window_size, args.context_size, args.batch_size
|
198 |
-
)
|
199 |
-
|
200 |
-
with open(text_path, "r") as f:
|
201 |
-
lines = f.readlines()
|
202 |
-
text = "".join(line for line in lines).replace("\n", " ").strip()
|
203 |
|
204 |
alignment_model, alignment_tokenizer, alignment_dictionary = load_alignment_model(
|
205 |
device,
|
206 |
dtype=torch.float16 if device == "cuda" else torch.float32,
|
207 |
)
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
emissions, stride = generate_emissions(
|
211 |
alignment_model, audio_waveform, batch_size=batch_size
|
212 |
)
|
213 |
|
214 |
-
|
215 |
tokens_starred, text_starred = preprocess_text(
|
216 |
text,
|
217 |
romanize=True,
|
218 |
language=language,
|
219 |
)
|
220 |
|
221 |
-
|
222 |
segments, scores, blank_id = get_alignments(
|
223 |
emissions,
|
224 |
tokens_starred,
|
@@ -228,5 +226,4 @@ segments, scores, blank_id = get_alignments(
|
|
228 |
spans = get_spans(tokens_starred, segments, alignment_tokenizer.decode(blank_id))
|
229 |
|
230 |
word_timestamps = postprocess_results(text_starred, spans, stride, scores)
|
231 |
-
|
232 |
```
|
|
|
177 |
## Usage
|
178 |
|
179 |
```python
|
180 |
+
import torch
|
181 |
from ctc_forced_aligner import (
|
182 |
load_audio,
|
183 |
load_alignment_model,
|
|
|
191 |
audio_path = "your/audio/path"
|
192 |
text_path = "your/text/path"
|
193 |
language = "iso" # ISO-639-3 Language code
|
194 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
195 |
+
batch_size = 16
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
alignment_model, alignment_tokenizer, alignment_dictionary = load_alignment_model(
|
199 |
device,
|
200 |
dtype=torch.float16 if device == "cuda" else torch.float32,
|
201 |
)
|
202 |
|
203 |
+
audio_waveform = load_audio(audio_path, alignment_model.dtype, alignment_model.device)
|
204 |
+
|
205 |
+
|
206 |
+
with open(text_path, "r") as f:
|
207 |
+
lines = f.readlines()
|
208 |
+
text = "".join(line for line in lines).replace("\n", " ").strip()
|
209 |
|
210 |
emissions, stride = generate_emissions(
|
211 |
alignment_model, audio_waveform, batch_size=batch_size
|
212 |
)
|
213 |
|
|
|
214 |
tokens_starred, text_starred = preprocess_text(
|
215 |
text,
|
216 |
romanize=True,
|
217 |
language=language,
|
218 |
)
|
219 |
|
|
|
220 |
segments, scores, blank_id = get_alignments(
|
221 |
emissions,
|
222 |
tokens_starred,
|
|
|
226 |
spans = get_spans(tokens_starred, segments, alignment_tokenizer.decode(blank_id))
|
227 |
|
228 |
word_timestamps = postprocess_results(text_starred, spans, stride, scores)
|
|
|
229 |
```
|