jpandeinge's picture
update test
7bf545d
import os
import csv
import random
import pandas as pd
import numpy as np
import gradio as gr
from collections import Counter
from utils import *
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavf
from huggingface_hub import Repository, upload_file
HF_TOKEN = os.environ.get("HF_TOKEN")
GREETINGS_DIR = './greetings'
greeting_files = [f.name for f in os.scandir(GREETINGS_DIR)]
DATASET_REPO_URL = "https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings"
REPOSITORY_DIR = "data"
LOCAL_DIR = 'data_local'
os.makedirs(LOCAL_DIR,exist_ok=True)
GENDER = ['Choose Gender','Male','Female','Other','Prefer not to say']
#------------------Work on Languages--------------------
languages = ["oshindonga", "oshikwanyama"]
language_id = ["ng","kj"]
#------------------Work on Languages--------------------
repo = Repository(
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
repo.git_pull()
with open('app.css','r') as f:
BLOCK_CSS = f.read()
def save_record(language,record,greeting,gender,accent,greeting_history,current_greeting,done_recording):
# set default
greeting_history = greeting_history if greeting_history is not None else [0]
current_greeting = current_greeting if current_greeting is not None else 0 # 0 is the default greeting
done_recording = done_recording if done_recording is not None else False
#----
# Save text and its corresponding record to flag
speaker_metadata={}
speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
speaker_metadata['accent'] = accent if accent!='' else ''
default_record = None
if not done_recording:
if language!=None and language!='Choose language' and record is not None and greeting is not None: #
language = language.lower()
lang_id = language_id[languages.index(language)]
# Write audio to file
audio_name = get_unique_name()
SAVE_FILE_DIR = os.path.join(LOCAL_DIR,audio_name)
os.makedirs(SAVE_FILE_DIR,exist_ok=True)
audio_output_filename = os.path.join(SAVE_FILE_DIR,'audio.wav')
wavf.write(audio_output_filename,record[0],record[1])
# Write metadata.json to file
json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
metadata= {
'id':audio_name,
'file_name':'audio.wav',
'language_name':language,
'language_id':lang_id,
'greeting':current_greeting,
'frequency':record[0],
'gender': speaker_metadata['gender'],
'accent': speaker_metadata['accent'],
}
dump_json(metadata,json_file_path)
# Upload the audio
repo_audio_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'audio.wav'))
_ = upload_file(path_or_fileobj = audio_output_filename,
path_in_repo =repo_audio_path,
repo_id='meyabase/crowd-oshiwambo-speech-greetings',
repo_type='dataset',
token=HF_TOKEN
)
# Upload the metadata
repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'metadata.jsonl'))
_ = upload_file(path_or_fileobj = json_file_path,
path_in_repo =repo_json_path,
repo_id='meyabase/crowd-oshiwambo-speech-greetings',
repo_type='dataset',
token=HF_TOKEN
)
output = f'Recording successfully saved! On to the next one...'
# Choose the next greeting
greeting_history.append(current_greeting)
# check the language selected and choose the next greeting based on the images available
if language=='oshindonga':
greeting_choices = [greet for greet in [i for i in range(3)] if greet not in greeting_history]
if greeting_choices!=[]:
next_greeting = random.choice(greeting_choices)
next_greeting_image = f'greetings/{language}/{next_greeting}.png'
else:
done_recording=True
next_greeting = 0
next_greeting_image = 'greetings/best.gif'
output = "You have finished all recording! You can reload to start again."
elif language=='oshikwanyama':
greeting_choices = [greet for greet in [i for i in range(3)] if greet not in greeting_history]
if greeting_choices!=[]:
next_greeting = random.choice(greeting_choices)
next_greeting_image = f'greetings/{language}/{next_greeting}.png'
else:
done_recording=True
next_greeting = 0
next_greeting_image = 'greetings/best.gif'
output = "You have finished all recording! You can reload to start again."
output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
return output_string,next_greeting_image,greeting_history,next_greeting,done_recording,default_record
if greeting is None:
output = "greeting must be specified!"
if record is None:
output="No recording found!"
if language is None or language=='Choose language':
output = 'Language must be specified!'
output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
# return output_string, previous image and state
return output_string, greeting,greeting_history,current_greeting,done_recording,default_record
else:
# Stop submitting recording (best.gif is displaying)
output = 'πŸ™Œ You have finished all recording! Thank You. You can reload to start again.'
output_string = "<div class='finished'>"+output+"</div>"
next_greeting = 0 # the default greeting
next_greeting_image = 'greetings/best.gif'
return output_string,next_greeting_image,greeting_history,next_greeting,done_recording,default_record
def get_metadata_json(path):
try:
return read_json_lines(path)[0]
except Exception:
return []
def get_metadata_of_dataset():
repo.git_pull()
REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []
audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings/resolve/main/data/') for a in audio_repo]
metadata_all = [get_metadata_json(os.path.join(f,'metadata.jsonl')) for f in repo_recordings]
metadata_all = [m for m in metadata_all if m!=[]]
return metadata_all
def display_records():
repo.git_pull()
REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []
audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings/resolve/main/data/') for a in audio_repo]
metadata_repo = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings]
audios_all = audio_repo
metadata_all = metadata_repo
langs=[m['language_name'] for m in metadata_all]
audios = [a for a in audios_all]
texts = [m['text'] for m in metadata_all]
greetings = [m['greeting'] for m in metadata_all]
html = f"""<div class="infoPoint">
<h1> Hooray! We have collected {len(metadata_all)} samples!</h1>
<table style="width:100%; text-align:center">
<tr>
<th>language</th>
<th>audio</th>
<th>greeting</th>
<th>text</th>
</tr>"""
for lang, audio, text,greet_ in zip(langs,audios,texts,greetings):
html+= f"""<tr>
<td>{lang}</td>
<td><audio controls><source src="{audio}" type="audio/wav"> </audio></td>
<td>{greet_}</td>
<td>{text}</td>
</tr>"""
html+="</table></div>"
return html
markdown = """<div style="text-align: center"><p style="font-size: 40px"> πŸ”Š <b>Oshiwambo Speech Greetings</b></p> <br>
This is a platform to contribute to your Oshiwambo greeting for the speech recognition task. <br> </div>"""
record_markdown = """
<br> Record greetings in your language and help us build a dataset for speech recognition in Oshiwambo. <br>
"""
# # Interface design begins
block = gr.Blocks(css=BLOCK_CSS)
with block:
gr.Markdown(markdown)
with gr.Tabs():
with gr.TabItem('Record'):
gr.Markdown(record_markdown)
with gr.Row():
language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(languages)]), label="Choose language", default=languages[0].title())
gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")
accent = gr.inputs.Textbox(label="Accent (optional)", default='', placeholder="e.g. oshikwanyama, oshindonga, oshimbadja, oshingadjera, etc.")
# define a default greeting first for each language
greeting = gr.Image(f'greetings/{languages[0].lower()}/0.png', image_mode="L")
greeting_history = gr.Variable() # stores the history of greetings
record = gr.Audio(source="microphone", label='Record your voice')
output_result = gr.outputs.HTML()
state = gr.Variable()
current_greeting = gr.Variable()
done_recording = gr.Variable() # Signifies when to stop submitting records even if `submit`` is clicked
save = gr.Button("Submit")
save.click(save_record, inputs=[language,record,greeting,gender,accent,state,current_greeting,done_recording],outputs=[output_result,greeting,state,current_greeting,done_recording,record])
block.launch()