voice-coe-data / app.py
mharakal's picture
Add slovak text to WebUI
05235e1
raw
history blame
7.04 kB
import gradio as gr
import os
HF_TOKEN = os.getenv('HF_TOKEN')
callback = gr.HuggingFaceDatasetSaver(HF_TOKEN, "paulbauriegel/voice-coe-demo")
sentences = \
{'en':[
"In winter, the dry leaves fly around in the air.",
"The good old man broke through the ice with his horse and fell into the cold water.",
"He always eats the eggs without salt and pepper.",
"Good to know. Now I can fix the appointment.",
"We find this approach is particularly effective at learning speech to text translation.",
"Ever wonder what your Representative has been up to?",
"The Wenker sheets are the data basis for Georg Wenker's language atlases",
"At least she gets 7000 dollars in damages"
],
'de':[
"Im Winter fliegen die trocknen Blätter durch die Luft herum.",
"Der gute alte Mann ist mit dem Pferde durch´s Eis gebrochen und in das kalte Wasser gefallen.",
"Er isst die Eier immer ohne Salz und Pfeffer.",
"Gut zu wissen. Jetzt kann ich den Termin vereinbaren.",
"Wir haben festgestellt, dass dieser Ansatz besonders effektiv beim Erlernen der Sprache-zu-Text Übersetzung ist.",
"Haben Sie sich jemals gefragt, was Ihr Abgeordneter so treibt?",
"Die Wenkerbogen stellen die Datengrundlage für Georg Wenkers Sprachatlanten dar",
"Zumindest bekommt sie 7000 Dollar Schmerzensgeld",
],
'ru': [
"Зимой сухие листья кружатся в воздухе.",
"Старик провалился под лед на своем коне и упал в холодную воду.",
"Он всегда ест яйца без соли и перца.",
"Это важная информация.Теперь я могу назначить встречу.",
"Мы считаем этот подход особенно эффективным при обучении переводу речи в текст.",
"Вы когда-нибудь задумывались, чем занимается ваш представитель?",
"Листы Венкера являются основой данных для языковых атласов Георга Венкера.",
"По крайней мере, она получает 7000 долларов в качестве возмещения ущерба."
],
'sk':[
"V zime lietajú suché listy vzduchom.",
"Starček prerazil ľad so svojím koňom a spadol do studenej vody.",
"Vajcia vždy konzumuje bez soli a korenia.",
"Je dobré vedieť, že si teraz môžem dohodnúť stretnutie.",
"Zistili sme, že tento prístup je obzvlášť efektívny pri učení sa prekladu reči do textu.",
"Premýšľali ste niekedy, čo chystá váš poslanec?",
"Wenkerove hárky predstavujú základ dát jazykových atlasov Georga Wenkera",
"Aspoň dostane ako bolestné najmenej 7000 dolárov",
]}
with gr.Blocks(title='Voice CoE Data Collection') as demo:
_ = gr.HTML('<center><h1>CoE Voice Data Collection</h1><center>')
lang = gr.Dropdown(
["de", "en", "ru", "sk"],
value='en',
interactive=True,
label="Choose your language",
)
client_ip = gr.Label("", label="User-IP", visible=False)
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_0 = gr.Label(sentences['en'][0], label="")
audio_0 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_1 = gr.Label(sentences['en'][1], label="")
audio_1 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_2 = gr.Label(sentences['en'][2], label="")
audio_2 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_3 = gr.Label(sentences['en'][3], label="")
audio_3 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_4 = gr.Label(sentences['en'][4], label="")
audio_4 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_5 = gr.Label(sentences['en'][5], label="")
audio_5 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_6 = gr.Label(sentences['en'][6], label="")
audio_6 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
#outputs = gr.components.Textbox(label=)
label_7 = gr.Label(sentences['en'][7], label="")
audio_7 = gr.Audio(source="microphone", type="filepath", label="Record sample")
with gr.Row():
acc = gr.Dropdown(
["yes", "no", "maybe"],
label="Do you have an accent in the spoken language",
)
with gr.Row():
agree = gr.Checkbox(value=False, label='I agree that my data is stored and analysed by the iHub CoE Voice Team')
with gr.Row():
btn = gr.Button("Submit data")
thx = gr.HTML('') #
lang.change(lambda x: {label_0: sentences[x][0],
label_1: sentences[x][1],
label_2: sentences[x][2],
label_3: sentences[x][3],
label_4: sentences[x][4],
label_5: sentences[x][5],
label_6: sentences[x][6],
label_7: sentences[x][7], },
lang,
[label_0, label_1, label_2, label_3,
label_4, label_5, label_6, label_7])
# This needs to be called at some point prior to the first call to callback.flag()
callback.setup([client_ip, lang, audio_0, audio_1, audio_2, audio_3, audio_4, audio_5, audio_6, audio_7, acc], "flagged_data_points")
# We can choose which components to flag -- in this case, we'll flag all of them
def submit_data(client_ip, lang, audio_0, audio_1, audio_2, audio_3, audio_4, audio_5, audio_6, audio_7, acc, agree, request: gr.Request):
if not agree:
return '<h3>No data has been submitted</h3>'
else:
client_ip_d = {'ip': request.client.host}
callback.flag([client_ip_d, lang, audio_0, audio_1, audio_2, audio_3, audio_4, audio_5, audio_6, audio_7, acc])
return '<h3>Thank you for submitting you data</h3>'
btn.click(submit_data,
[client_ip, lang, audio_0, audio_1, audio_2, audio_3, audio_4, audio_5, audio_6, audio_7, acc, agree],
thx,
preprocess=False)
demo.launch()