Alyosha11
/

Phoneme

Alyosha11 commited on May 10

Commit

5a5eaa3

•

1 Parent(s): 9c642b1

Upload parallel_phonemize.sh with huggingface_hub

Files changed (1) hide show

parallel_phonemize.sh ADDED Viewed

+#!/bin/bash
+input_dir="txt"
+output_dir="sangraha_hi_phonemized"
+lang=hi
+num_files=50000
+num_jobs=-1
+process_file() {
+    input_file="$1"
+    output_file="$2"
+    lang=hi
+    # Create the output directory and its parent directories if they don't exist
+    mkdir -p "$(dirname "$output_file")"
+    phonemize --quiet -l $lang "$input_file" -o "$output_file" --strip --language-switch remove-flags --preserve-punctuation
+    echo "Processed: $input_file -> $output_file"
+}
+export -f process_file
+# Start the timer
+start_time=$(date +%s)
+# Use GNU Parallel with find to process files in parallel
+find "$input_dir" -type f -name "*.txt" | head -n $num_files | parallel -j $num_jobs process_file "{}" "${output_dir}/phn_$(basename {})"
+# End the timer
+end_time=$(date +%s)
+# Calculate the elapsed time
+elapsed_time=$((end_time - start_time))
+# Convert elapsed time to minutes and seconds
+minutes=$((elapsed_time / 60))
+seconds=$((elapsed_time % 60))
+# Print the benchmark results
+echo "Benchmark Results:"
+echo "Number of files processed: $num_files"
+echo "Number of parallel jobs: $num_jobs"
+echo "Elapsed time: $minutes minutes $seconds seconds"