|
import spaces |
|
import gradio as gr |
|
from transformers import MT5ForConditionalGeneration, MT5Tokenizer,T5ForConditionalGeneration, T5Tokenizer |
|
|
|
models = { |
|
"finetuned mt5-base":"alakxender/mt5-base-dv-en", |
|
"madlad400-3b":"google/madlad400-3b-mt", |
|
"madlad400-10b":"google/madlad400-10b-mt", |
|
} |
|
|
|
def tranlate(text:str,model_name:str): |
|
if (len(text)>2000): |
|
raise gr.Error(f"Try smaller text, yours is {len(text)}. try to fit to 2000 chars.") |
|
|
|
if (model_name is None): |
|
raise gr.Error("huh! not sure what to do without a model. select a model.") |
|
|
|
if model_name =='finetuned mt5-base': |
|
return mt5_translate(text,model_name) |
|
else: |
|
return t5_tranlaste(text,model_name) |
|
|
|
@spaces.GPU(duration=30) |
|
def t5_tranlaste(text:str,model_name:str): |
|
|
|
model = T5ForConditionalGeneration.from_pretrained(models[model_name], device_map="auto") |
|
tokenizer = T5Tokenizer.from_pretrained(models[model_name]) |
|
|
|
text = f"<2en> {text}" |
|
input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device) |
|
outputs = model.generate(input_ids=input_ids, max_new_tokens=1024*2,max_length=1024) |
|
|
|
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
return translated_text |
|
|
|
def mt5_translate(text:str, model_name:str): |
|
|
|
model = MT5ForConditionalGeneration.from_pretrained(models[model_name]) |
|
tokenizer = MT5Tokenizer.from_pretrained(models[model_name]) |
|
inputs = tokenizer(text, return_tensors="pt") |
|
result = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], max_new_tokens=1024*2,max_length=1024) |
|
translated_text = tokenizer.decode(result[0], skip_special_tokens=True) |
|
return translated_text |
|
|
|
css = """ |
|
.textbox1 textarea { |
|
font-size: 18px !important; |
|
font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important; |
|
line-height: 1.8 !important; |
|
} |
|
""" |
|
|
|
demo = gr.Interface( |
|
fn=tranlate, |
|
inputs= [ |
|
gr.Textbox(lines=5, label="Enter Dhivehi Text", rtl=True, elem_classes="textbox1"), |
|
gr.Dropdown(choices=list(models.keys()), label="Select a model", value="finetuned mt5-base"), |
|
], |
|
css=css, |
|
outputs=gr.Textbox(label="English Translation"), |
|
title="Dhivehi to English Translation", |
|
description="Translate Dhivehi text to English", |
|
examples=[ |
|
["މާލޭގައި ފެންބޮޑުވާ މަގުތައް މަރާމާތު ކުރަން ފަށައިފި","finetuned mt5-base"], |
|
["މާލޭގައި ފެންބޮޑުވާ މަގުތައް މަރާމާތު ކުރަން ފަށައިފި","madlad400-3b"], |
|
["މާލޭގައި ފެންބޮޑުވާ މަގުތައް މަރާމާތު ކުރަން ފަށައިފި","madlad400-10b"], |
|
["މިއަދު މެންދުރު 12:45 ހާއިރު މާލޭގެ ޝަހީދު އަލީ މިސްކިތް ސަރަހައްދުގައި ވެސް ވަނީ މާރާމާރީއެއް ހިންގައިފަ އެވެ.","finetuned mt5-base"], |
|
["މިއަދު މެންދުރު 12:45 ހާއިރު މާލޭގެ ޝަހީދު އަލީ މިސްކިތް ސަރަހައްދުގައި ވެސް ވަނީ މާރާމާރީއެއް ހިންގައިފަ އެވެ.","madlad400-3b"], |
|
["މިއަދު މެންދުރު 12:45 ހާއިރު މާލޭގެ ޝަހީދު އަލީ މިސްކިތް ސަރަހައްދުގައި ވެސް ވަނީ މާރާމާރީއެއް ހިންގައިފަ އެވެ.","madlad400-10b"] |
|
] |
|
) |
|
|
|
demo.launch() |
|
|