Add support for the Whisper model large-v3-turbo.
Browse files- app.py +0 -2
- config.json5 +4 -0
- docs/options.md +1 -0
- src/translation/translationModel.py +10 -0
- src/whisper/fasterWhisperContainer.py +3 -1
app.py
CHANGED
@@ -56,8 +56,6 @@ MAX_FILE_PREFIX_LENGTH = 17
|
|
56 |
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
57 |
MAX_AUTO_CPU_CORES = 8
|
58 |
|
59 |
-
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "large-v3"]
|
60 |
-
|
61 |
class VadOptions:
|
62 |
def __init__(self, vad: str = None, vadMergeWindow: float = 5, vadMaxMergeSize: float = 150, vadPadding: float = 1, vadPromptWindow: float = 1,
|
63 |
vadInitialPromptMode: Union[VadInitialPromptMode, str] = VadInitialPromptMode.PREPREND_FIRST_SEGMENT):
|
|
|
56 |
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
57 |
MAX_AUTO_CPU_CORES = 8
|
58 |
|
|
|
|
|
59 |
class VadOptions:
|
60 |
def __init__(self, vad: str = None, vadMergeWindow: float = 5, vadMaxMergeSize: float = 150, vadPadding: float = 1, vadPromptWindow: float = 1,
|
61 |
vadInitialPromptMode: Union[VadInitialPromptMode, str] = VadInitialPromptMode.PREPREND_FIRST_SEGMENT):
|
config.json5
CHANGED
@@ -34,6 +34,10 @@
|
|
34 |
{
|
35 |
"name": "large-v3",
|
36 |
"url": "large-v3"
|
|
|
|
|
|
|
|
|
37 |
}
|
38 |
// Uncomment to add custom Japanese models
|
39 |
//{
|
|
|
34 |
{
|
35 |
"name": "large-v3",
|
36 |
"url": "large-v3"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"name": "large-v3-turbo",
|
40 |
+
"url": "large-v3-turbo"
|
41 |
}
|
42 |
// Uncomment to add custom Japanese models
|
43 |
//{
|
docs/options.md
CHANGED
@@ -17,6 +17,7 @@ Select the model that Whisper will use to transcribe the audio:
|
|
17 |
| large | 1550 M | N/A | large | ~10 GB | 1x |
|
18 |
| large-v2 | 1550 M | N/A | large | ~10 GB | 1x |
|
19 |
| large-v3 | 1550 M | N/A | large | ~10 GB | 1x |
|
|
|
20 |
|
21 |
## Language
|
22 |
|
|
|
17 |
| large | 1550 M | N/A | large | ~10 GB | 1x |
|
18 |
| large-v2 | 1550 M | N/A | large | ~10 GB | 1x |
|
19 |
| large-v3 | 1550 M | N/A | large | ~10 GB | 1x |
|
20 |
+
| turbo | 809 M | N/A | turbo | ~6 GB | 8x |
|
21 |
|
22 |
## Language
|
23 |
|
src/translation/translationModel.py
CHANGED
@@ -423,6 +423,16 @@ class TranslationModel:
|
|
423 |
else: #M2M100 & NLLB
|
424 |
output = self.transTranslator(text, max_length=max_length, batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, num_beams=self.numBeams)
|
425 |
result = output[0]['translation_text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
except Exception as e:
|
427 |
print(traceback.format_exc())
|
428 |
print("Error translation text: " + str(e))
|
|
|
423 |
else: #M2M100 & NLLB
|
424 |
output = self.transTranslator(text, max_length=max_length, batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, num_beams=self.numBeams)
|
425 |
result = output[0]['translation_text']
|
426 |
+
|
427 |
+
if len(result) > 2:
|
428 |
+
if result[len(result) - 1] == "\"" and result[0] == "\"":
|
429 |
+
result = result[1:-1]
|
430 |
+
elif result[len(result) - 1] == "'" and result[0] == "'":
|
431 |
+
result = result[1:-1]
|
432 |
+
elif result[len(result) - 1] == "「" and result[0] == "」":
|
433 |
+
result = result[1:-1]
|
434 |
+
elif result[len(result) - 1] == "『" and result[0] == "』":
|
435 |
+
result = result[1:-1]
|
436 |
except Exception as e:
|
437 |
print(traceback.format_exc())
|
438 |
print("Error translation text: " + str(e))
|
src/whisper/fasterWhisperContainer.py
CHANGED
@@ -42,11 +42,13 @@ class FasterWhisperContainer(AbstractWhisperContainer):
|
|
42 |
model_url = model_config.url
|
43 |
|
44 |
if model_config.type == "whisper":
|
45 |
-
if model_url not in ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "large-v3"]:
|
46 |
raise Exception("FasterWhisperContainer does not yet support Whisper models. Use ct2-transformers-converter to convert the model to a faster-whisper model.")
|
47 |
if model_url == "large":
|
48 |
# large is an alias for large-v1
|
49 |
model_url = "large-v1"
|
|
|
|
|
50 |
|
51 |
device = self.device
|
52 |
|
|
|
42 |
model_url = model_config.url
|
43 |
|
44 |
if model_config.type == "whisper":
|
45 |
+
if model_url not in ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
|
46 |
raise Exception("FasterWhisperContainer does not yet support Whisper models. Use ct2-transformers-converter to convert the model to a faster-whisper model.")
|
47 |
if model_url == "large":
|
48 |
# large is an alias for large-v1
|
49 |
model_url = "large-v1"
|
50 |
+
elif model_url == "large-v3-turbo":
|
51 |
+
model_url = "deepdml/faster-whisper-large-v3-turbo-ct2"
|
52 |
|
53 |
device = self.device
|
54 |
|