jhj0517 commited on
Commit
e6158a2
1 Parent(s): 62488f6

add DeepL API translation

Browse files
Files changed (2) hide show
  1. app.py +32 -1
  2. modules/deepl_api.py +196 -0
app.py CHANGED
@@ -7,6 +7,7 @@ from modules.faster_whisper_inference import FasterWhisperInference
7
  from modules.nllb_inference import NLLBInference
8
  from ui.htmls import *
9
  from modules.youtube_manager import get_ytmetas
 
10
 
11
  class App:
12
  def __init__(self, args):
@@ -19,6 +20,7 @@ class App:
19
  print("Use Open AI Whisper implementation")
20
  print(f"Device \"{self.whisper_inf.device}\" is detected")
21
  self.nllb_inf = NLLBInference()
 
22
 
23
  @staticmethod
24
  def open_folder(folder_path: str):
@@ -152,7 +154,36 @@ class App:
152
  file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here",
153
  file_types=['.vtt', '.srt'])
154
 
155
- with gr.TabItem("NLLB"): # sub tab1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  with gr.Row():
157
  dd_nllb_model = gr.Dropdown(label="Model", value=self.nllb_inf.default_model_size,
158
  choices=self.nllb_inf.available_models)
 
7
  from modules.nllb_inference import NLLBInference
8
  from ui.htmls import *
9
  from modules.youtube_manager import get_ytmetas
10
+ from modules.deepl_api import DeepLAPI
11
 
12
  class App:
13
  def __init__(self, args):
 
20
  print("Use Open AI Whisper implementation")
21
  print(f"Device \"{self.whisper_inf.device}\" is detected")
22
  self.nllb_inf = NLLBInference()
23
+ self.deepl_api = DeepLAPI()
24
 
25
  @staticmethod
26
  def open_folder(folder_path: str):
 
154
  file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here",
155
  file_types=['.vtt', '.srt'])
156
 
157
+ with gr.TabItem("DeepL API"): # sub tab1
158
+ with gr.Row():
159
+ tb_authkey = gr.Textbox(label="Your Auth Key (API KEY)",
160
+ value="")
161
+ with gr.Row():
162
+ dd_deepl_sourcelang = gr.Dropdown(label="Source Language", value="Automatic Detection",
163
+ choices=list(
164
+ self.deepl_api.available_source_langs.keys()))
165
+ dd_deepl_targetlang = gr.Dropdown(label="Target Language", value="English",
166
+ choices=list(
167
+ self.deepl_api.available_target_langs.keys()))
168
+ with gr.Row():
169
+ cb_deepl_ispro = gr.Checkbox(label="Pro User?", value=False)
170
+ with gr.Row():
171
+ btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
172
+ with gr.Row():
173
+ tb_indicator = gr.Textbox(label="Output", scale=4)
174
+ files_subtitles = gr.Files(label="Downloadable output file", scale=4)
175
+ btn_openfolder = gr.Button('📂', scale=1)
176
+
177
+ btn_run.click(fn=self.deepl_api.translate_deepl,
178
+ inputs=[tb_authkey, file_subs, dd_deepl_sourcelang, dd_deepl_targetlang,
179
+ cb_deepl_ispro],
180
+ outputs=[tb_indicator, files_subtitles])
181
+
182
+ btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
183
+ inputs=None,
184
+ outputs=None)
185
+
186
+ with gr.TabItem("NLLB"): # sub tab2
187
  with gr.Row():
188
  dd_nllb_model = gr.Dropdown(label="Model", value=self.nllb_inf.default_model_size,
189
  choices=self.nllb_inf.available_models)
modules/deepl_api.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import time
3
+ import os
4
+ from datetime import datetime
5
+ import gradio as gr
6
+
7
+ from modules.subtitle_manager import *
8
+
9
+ """
10
+ This is written with reference to the DeepL API documentation.
11
+ If you want to know the information of the DeepL API, see here: https://www.deepl.com/docs-api/documents
12
+ """
13
+
14
+ DEEPL_AVAILABLE_TARGET_LANGS = {
15
+ 'Bulgarian': 'BG',
16
+ 'Czech': 'CS',
17
+ 'Danish': 'DA',
18
+ 'German': 'DE',
19
+ 'Greek': 'EL',
20
+ 'English': 'EN',
21
+ 'English (British)': 'EN-GB',
22
+ 'English (American)': 'EN-US',
23
+ 'Spanish': 'ES',
24
+ 'Estonian': 'ET',
25
+ 'Finnish': 'FI',
26
+ 'French': 'FR',
27
+ 'Hungarian': 'HU',
28
+ 'Indonesian': 'ID',
29
+ 'Italian': 'IT',
30
+ 'Japanese': 'JA',
31
+ 'Korean': 'KO',
32
+ 'Lithuanian': 'LT',
33
+ 'Latvian': 'LV',
34
+ 'Norwegian (Bokmål)': 'NB',
35
+ 'Dutch': 'NL',
36
+ 'Polish': 'PL',
37
+ 'Portuguese': 'PT',
38
+ 'Portuguese (Brazilian)': 'PT-BR',
39
+ 'Portuguese (all Portuguese varieties excluding Brazilian Portuguese)': 'PT-PT',
40
+ 'Romanian': 'RO',
41
+ 'Russian': 'RU',
42
+ 'Slovak': 'SK',
43
+ 'Slovenian': 'SL',
44
+ 'Swedish': 'SV',
45
+ 'Turkish': 'TR',
46
+ 'Ukrainian': 'UK',
47
+ 'Chinese (simplified)': 'ZH'
48
+ }
49
+
50
+ DEEPL_AVAILABLE_SOURCE_LANGS = {
51
+ 'Automatic Detection': None,
52
+ 'Bulgarian': 'BG',
53
+ 'Czech': 'CS',
54
+ 'Danish': 'DA',
55
+ 'German': 'DE',
56
+ 'Greek': 'EL',
57
+ 'English': 'EN',
58
+ 'Spanish': 'ES',
59
+ 'Estonian': 'ET',
60
+ 'Finnish': 'FI',
61
+ 'French': 'FR',
62
+ 'Hungarian': 'HU',
63
+ 'Indonesian': 'ID',
64
+ 'Italian': 'IT',
65
+ 'Japanese': 'JA',
66
+ 'Korean': 'KO',
67
+ 'Lithuanian': 'LT',
68
+ 'Latvian': 'LV',
69
+ 'Norwegian (Bokmål)': 'NB',
70
+ 'Dutch': 'NL',
71
+ 'Polish': 'PL',
72
+ 'Portuguese (all Portuguese varieties mixed)': 'PT',
73
+ 'Romanian': 'RO',
74
+ 'Russian': 'RU',
75
+ 'Slovak': 'SK',
76
+ 'Slovenian': 'SL',
77
+ 'Swedish': 'SV',
78
+ 'Turkish': 'TR',
79
+ 'Ukrainian': 'UK',
80
+ 'Chinese': 'ZH'
81
+ }
82
+
83
+
84
+ class DeepLAPI:
85
+ def __init__(self):
86
+ self.api_interval = 1
87
+ self.max_text_batch_size = 50
88
+ self.available_target_langs = DEEPL_AVAILABLE_TARGET_LANGS
89
+ self.available_source_langs = DEEPL_AVAILABLE_SOURCE_LANGS
90
+
91
+ def translate_deepl(self,
92
+ auth_key: str,
93
+ fileobjs: list,
94
+ source_lang: str,
95
+ target_lang: str,
96
+ is_pro: bool,
97
+ progress=gr.Progress()) -> list:
98
+ """
99
+ Translate subtitle files using DeepL API
100
+ Parameters
101
+ ----------
102
+ auth_key: str
103
+ API Key for DeepL from gr.Textbox()
104
+ fileobjs: list
105
+ List of files to transcribe from gr.Files()
106
+ source_lang: str
107
+ Source language of the file to transcribe from gr.Dropdown()
108
+ target_lang: str
109
+ Target language of the file to transcribe from gr.Dropdown()
110
+ is_pro: str
111
+ Boolean value that is about pro user or not from gr.Checkbox().
112
+ progress: gr.Progress
113
+ Indicator to show progress directly in gradio.
114
+ Returns
115
+ ----------
116
+ A List of
117
+ String to return to gr.Textbox()
118
+ Files to return to gr.Files()
119
+ """
120
+
121
+ files_info = {}
122
+ for fileobj in fileobjs:
123
+ file_path = fileobj.name
124
+ file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
125
+
126
+ if file_ext == ".srt":
127
+ parsed_dicts = parse_srt(file_path=file_path)
128
+
129
+ batch_size = self.max_text_batch_size
130
+ for batch_start in range(0, len(parsed_dicts), batch_size):
131
+ batch_end = min(batch_start + batch_size, len(parsed_dicts))
132
+ sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
133
+ translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
134
+ target_lang, is_pro)
135
+ for i, translated_text in enumerate(translated_texts):
136
+ parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
137
+ progress(batch_end / len(parsed_dicts), desc="Translating..")
138
+
139
+ subtitle = get_serialized_srt(parsed_dicts)
140
+ timestamp = datetime.now().strftime("%m%d%H%M%S")
141
+
142
+ file_name = file_name[:-9]
143
+ output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}.srt")
144
+ write_file(subtitle, output_path)
145
+
146
+ elif file_ext == ".vtt":
147
+ parsed_dicts = parse_vtt(file_path=file_path)
148
+
149
+ batch_size = self.max_text_batch_size
150
+ for batch_start in range(0, len(parsed_dicts), batch_size):
151
+ batch_end = min(batch_start + batch_size, len(parsed_dicts))
152
+ sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
153
+ translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
154
+ target_lang, is_pro)
155
+ for i, translated_text in enumerate(translated_texts):
156
+ parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
157
+ progress(batch_end / len(parsed_dicts), desc="Translating..")
158
+
159
+ subtitle = get_serialized_vtt(parsed_dicts)
160
+ timestamp = datetime.now().strftime("%m%d%H%M%S")
161
+
162
+ file_name = file_name[:-9]
163
+ output_path = os.path.join("outputs", "translations", f"{file_name}-{timestamp}.srt")
164
+
165
+ write_file(subtitle, output_path)
166
+
167
+ files_info[file_name] = subtitle
168
+ total_result = ''
169
+ for file_name, subtitle in files_info.items():
170
+ total_result += '------------------------------------\n'
171
+ total_result += f'{file_name}\n\n'
172
+ total_result += f'{subtitle}'
173
+
174
+ gr_str = f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
175
+ return [gr_str, output_path]
176
+
177
+ def request_deepl_translate(self,
178
+ auth_key: str,
179
+ text: list,
180
+ source_lang: str,
181
+ target_lang: str,
182
+ is_pro: bool):
183
+ """Request API response to DeepL server"""
184
+
185
+ url = 'https://api.deepl.com/v2/translate' if is_pro else 'https://api-free.deepl.com/v2/translate'
186
+ headers = {
187
+ 'Authorization': f'DeepL-Auth-Key {auth_key}'
188
+ }
189
+ data = {
190
+ 'text': text,
191
+ 'source_lang': DEEPL_AVAILABLE_SOURCE_LANGS[source_lang],
192
+ 'target_lang': DEEPL_AVAILABLE_TARGET_LANGS[target_lang]
193
+ }
194
+ response = requests.post(url, headers=headers, data=data).json()
195
+ time.sleep(self.api_interval)
196
+ return response["translations"]