Dmytro Vodianytskyi commited on
Commit
82c52c7
1 Parent(s): 5aa1ec5

Add application file

Browse files
Files changed (2) hide show
  1. app.py +60 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import T5Tokenizer, MT5ForConditionalGeneration
4
+
5
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+ TOKENIZER = T5Tokenizer.from_pretrained('google/mt5-small')
7
+ MODEL = MT5ForConditionalGeneration.from_pretrained("werent4/mt5TranslatorLT")
8
+ MODEL.to(DEVICE)
9
+
10
+ def translate(text, mode, max_length, num_beams):
11
+ text = f"translate English to Lithuanian: {text}" if mode == "En2Lt" else f"translate Lithuanian to English: {text}"
12
+ encoded_input = TOKENIZER(text, return_tensors="pt", padding=True, truncation=True, max_length=max_length).to(DEVICE)
13
+ with torch.no_grad():
14
+ output_tokens = MODEL.generate(
15
+ **encoded_input,
16
+ max_length=max_length,
17
+ num_beams=num_beams,
18
+ no_repeat_ngram_size=2,
19
+ early_stopping=True
20
+ )
21
+
22
+ return TOKENIZER.decode(output_tokens[0], skip_special_tokens=True)
23
+
24
+
25
+ with gr.Blocks() as interface:
26
+ gr.Markdown("<h1>Lt🔄En: Lithuanian to English and vice versa")
27
+ with gr.Row():
28
+ max_length = gr.Slider(1, 512, value=128, label="Max length", interactive=True)
29
+ num_beams = gr.Slider(1, 16, value=5, step=False, label="Num beams", interactive=True)
30
+ with gr.Row():
31
+ input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
32
+ with gr.Column():
33
+ mode = gr.Dropdown(label="Mode", choices=["En2Lt", "Lt2En"])
34
+ translate_button = gr.Button("Translate")
35
+ output_text = gr.Textbox(label="Translated text")
36
+ with gr.Accordion("How to run the model locally:", open=False):
37
+ gr.Code("""import torch
38
+ from transformers import T5Tokenizer, MT5ForConditionalGeneration
39
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
+ tokenizer = T5Tokenizer.from_pretrained('google/mt5-small')
41
+ model = MT5ForConditionalGeneration.from_pretrained("werent4/mt5TranslatorLT")
42
+ model.to(device)
43
+ def translate(text, model, tokenizer, device):
44
+ input_text = f"translate English to Lithuanian: {text}"
45
+ encoded_input = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
46
+ with torch.no_grad():
47
+ output_tokens = model.generate(
48
+ **encoded_input,
49
+ max_length=128,
50
+ num_beams=5,
51
+ no_repeat_ngram_size=2,
52
+ early_stopping=True
53
+ )
54
+ translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
55
+ return translated_text
56
+ text = "I live in Kaunas"
57
+ translate(text, model, tokenizer, device)
58
+ """, language='python')
59
+ translate_button.click(fn=translate, inputs=[input_text, mode, max_length, num_beams], outputs=[output_text])
60
+ interface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentencepiece