Mahiruoshi commited on
Commit
671ec44
1 Parent(s): 562810d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -727
app.py CHANGED
@@ -24,6 +24,8 @@ import torch.nn as nn
24
  from torch.utils.data import Dataset
25
  from torch.utils.data import DataLoader, Dataset
26
  from tqdm import tqdm
 
 
27
 
28
  import gradio as gr
29
 
@@ -40,33 +42,7 @@ from models import SynthesizerTrn
40
  from text.symbols import symbols
41
  import sys
42
  import re
43
-
44
- import random
45
- import hashlib
46
-
47
- from fugashi import Tagger
48
- import jaconv
49
- import unidic
50
- import subprocess
51
-
52
- import requests
53
-
54
- from ebooklib import epub
55
- import PyPDF2
56
- from PyPDF2 import PdfReader
57
- from bs4 import BeautifulSoup
58
- import jieba
59
- import romajitable
60
-
61
- webBase = {
62
- 'pyopenjtalk-V2.3-Katakana': 'https://mahiruoshi-mygo-vits-bert.hf.space/',
63
- 'fugashi-V2.3-Katakana': 'https://mahiruoshi-mygo-vits-bert.hf.space/',
64
- }
65
-
66
- languages = [ "Auto", "ZH", "JP"]
67
- modelPaths = []
68
- modes = ['pyopenjtalk-V2.3','pyopenjtalk-V2.3-Katakana']
69
- sentence_modes = ['sentence','paragraph']
70
 
71
  net_g = None
72
 
@@ -93,355 +69,6 @@ BandList = {
93
  "AveMujica":["祥子","睦","海鈴","にゃむ","初華"],
94
  }
95
 
96
- SchoolLilst = {
97
- "圣翔音乐学园":["華戀","光","香子","雙葉","真晝","純那","克洛迪娜","真矢","奈奈"],
98
- "凛明馆女子学校":["珠緒","壘","文","悠悠子","一愛"],
99
- "弗隆提亚艺术学校":["艾露","艾露露","菈樂菲","司","靜羽"],
100
- "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
101
- }
102
-
103
- #翻译
104
-
105
- def translate(Sentence: str, to_Language: str = "jp", from_Language: str = ""):
106
- """
107
- :param Sentence: 待翻译语句
108
- :param from_Language: 待翻译语句语言
109
- :param to_Language: 目标语言
110
- :return: 翻译后语句 出错时返回None
111
-
112
- 常见语言代码:中文 zh 英语 en 日语 jp
113
- """
114
- appid = "20231117001883321"
115
- key = "lMQbvZHeJveDceLof2wf"
116
- if appid == "" or key == "":
117
- return "请开发者在config.yml中配置app_key与secret_key"
118
- url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
119
- texts = Sentence.splitlines()
120
- outTexts = []
121
- for t in texts:
122
- if t != "":
123
- # 签名计算 参考文档 https://api.fanyi.baidu.com/product/113
124
- salt = str(random.randint(1, 100000))
125
- signString = appid + t + salt + key
126
- hs = hashlib.md5()
127
- hs.update(signString.encode("utf-8"))
128
- signString = hs.hexdigest()
129
- if from_Language == "":
130
- from_Language = "auto"
131
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
132
- payload = {
133
- "q": t,
134
- "from": from_Language,
135
- "to": to_Language,
136
- "appid": appid,
137
- "salt": salt,
138
- "sign": signString,
139
- }
140
- # 发送请求
141
- try:
142
- response = requests.post(
143
- url=url, data=payload, headers=headers, timeout=3
144
- )
145
- response = response.json()
146
- if "trans_result" in response.keys():
147
- result = response["trans_result"][0]
148
- if "dst" in result.keys():
149
- dst = result["dst"]
150
- outTexts.append(dst)
151
- except Exception:
152
- return Sentence
153
- else:
154
- outTexts.append(t)
155
- return "\n".join(outTexts)
156
-
157
- #文本清洗工具
158
- def is_japanese(string):
159
- for ch in string:
160
- if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
161
- return True
162
- return False
163
-
164
- def is_chinese(string):
165
- for ch in string:
166
- if '\u4e00' <= ch <= '\u9fff':
167
- return True
168
- return False
169
-
170
- def is_single_language(sentence):
171
- # 检查句子是否为单一语言
172
- contains_chinese = re.search(r'[\u4e00-\u9fff]', sentence) is not None
173
- contains_japanese = re.search(r'[\u3040-\u30ff\u31f0-\u31ff]', sentence) is not None
174
- contains_english = re.search(r'[a-zA-Z]', sentence) is not None
175
- language_count = sum([contains_chinese, contains_japanese, contains_english])
176
- return language_count == 1
177
-
178
- def merge_scattered_parts(sentences):
179
- """合并零散的部分到相邻的句子中,并确保单一语言性"""
180
- merged_sentences = []
181
- buffer_sentence = ""
182
-
183
- for sentence in sentences:
184
- # 检查是否是单一语言或者太短(可能是标点或单个词)
185
- if is_single_language(sentence) and len(sentence) > 1:
186
- # 如果缓冲区有内容,先将缓冲区的内容添加到列表
187
- if buffer_sentence:
188
- merged_sentences.append(buffer_sentence)
189
- buffer_sentence = ""
190
- merged_sentences.append(sentence)
191
- else:
192
- # 如果是零散的部分,将其添加到缓冲区
193
- buffer_sentence += sentence
194
-
195
- # 确保最后的缓冲区内容被添加
196
- if buffer_sentence:
197
- merged_sentences.append(buffer_sentence)
198
-
199
- return merged_sentences
200
-
201
- def is_only_punctuation(s):
202
- """检查字符串是否只包含标点符号"""
203
- # 此处列出中文、日文、英文常见标点符号
204
- punctuation_pattern = re.compile(r'^[\s。*;,:“”()、!?《》\u3000\.,;:"\'?!()]+$')
205
- return punctuation_pattern.match(s) is not None
206
-
207
- def split_mixed_language(sentence):
208
- # 分割混合语言句子
209
- # 逐字符检查,分割不同语言部分
210
- sub_sentences = []
211
- current_language = None
212
- current_part = ""
213
-
214
- for char in sentence:
215
- if re.match(r'[\u4e00-\u9fff]', char): # Chinese character
216
- if current_language != 'chinese':
217
- if current_part:
218
- sub_sentences.append(current_part)
219
- current_part = char
220
- current_language = 'chinese'
221
- else:
222
- current_part += char
223
- elif re.match(r'[\u3040-\u30ff\u31f0-\u31ff]', char): # Japanese character
224
- if current_language != 'japanese':
225
- if current_part:
226
- sub_sentences.append(current_part)
227
- current_part = char
228
- current_language = 'japanese'
229
- else:
230
- current_part += char
231
- elif re.match(r'[a-zA-Z]', char): # English character
232
- if current_language != 'english':
233
- if current_part:
234
- sub_sentences.append(current_part)
235
- current_part = char
236
- current_language = 'english'
237
- else:
238
- current_part += char
239
- else:
240
- current_part += char # For punctuation and other characters
241
-
242
- if current_part:
243
- sub_sentences.append(current_part)
244
-
245
- return sub_sentences
246
-
247
- def replace_quotes(text):
248
- # 替换中文、日文引号为英文引号
249
- text = re.sub(r'[“”‘’『』「」()()]', '"', text)
250
- return text
251
-
252
- def remove_numeric_annotations(text):
253
- # 定义用于匹配数字注释的正则表达式
254
- # 包括 “”、【】和〔〕包裹的数字
255
- pattern = r'“\d+”|【\d+】|〔\d+〕'
256
- # 使用正则表达式替换掉这些注释
257
- cleaned_text = re.sub(pattern, '', text)
258
- return cleaned_text
259
-
260
- def merge_adjacent_japanese(sentences):
261
- """合并相邻且都只包含日语的句子"""
262
- merged_sentences = []
263
- i = 0
264
- while i < len(sentences):
265
- current_sentence = sentences[i]
266
- if i + 1 < len(sentences) and is_japanese(current_sentence) and is_japanese(sentences[i + 1]):
267
- # 当前句子和下一句都是日语,合并它们
268
- while i + 1 < len(sentences) and is_japanese(sentences[i + 1]):
269
- current_sentence += sentences[i + 1]
270
- i += 1
271
- merged_sentences.append(current_sentence)
272
- i += 1
273
- return merged_sentences
274
-
275
- def extrac(text):
276
- text = replace_quotes(remove_numeric_annotations(text)) # 替换引号
277
- text = re.sub("<[^>]*>", "", text) # 移除 HTML 标签
278
- # 使用换行符和标点符号进行初步分割
279
- preliminary_sentences = re.split(r'([\n。;!?\.\?!])', text)
280
- final_sentences = []
281
-
282
- preliminary_sentences = re.split(r'([\n。;!?\.\?!])', text)
283
-
284
- for piece in preliminary_sentences:
285
- if is_single_language(piece):
286
- final_sentences.append(piece)
287
- else:
288
- sub_sentences = split_mixed_language(piece)
289
- final_sentences.extend(sub_sentences)
290
-
291
- # 处理长句子,使用jieba进行分词
292
- split_sentences = []
293
- for sentence in final_sentences:
294
- split_sentences.extend(split_long_sentences(sentence))
295
-
296
- # 合并相邻的日语句子
297
- merged_japanese_sentences = merge_adjacent_japanese(split_sentences)
298
-
299
- # 剔除只包含标点符号的元素
300
- clean_sentences = [s for s in merged_japanese_sentences if not is_only_punctuation(s)]
301
-
302
- # 移除空字符串并去除多余引号
303
- return [s.replace('"','').strip() for s in clean_sentences if s]
304
-
305
-
306
-
307
- # 移除空字符串
308
-
309
- def is_mixed_language(sentence):
310
- contains_chinese = re.search(r'[\u4e00-\u9fff]', sentence) is not None
311
- contains_japanese = re.search(r'[\u3040-\u30ff\u31f0-\u31ff]', sentence) is not None
312
- contains_english = re.search(r'[a-zA-Z]', sentence) is not None
313
- languages_count = sum([contains_chinese, contains_japanese, contains_english])
314
- return languages_count > 1
315
-
316
- def split_mixed_language(sentence):
317
- # 分割混合语言句子
318
- sub_sentences = re.split(r'(?<=[。!?\.\?!])(?=")|(?<=")(?=[\u4e00-\u9fff\u3040-\u30ff\u31f0-\u31ff]|[a-zA-Z])', sentence)
319
- return [s.strip() for s in sub_sentences if s.strip()]
320
-
321
- def seconds_to_ass_time(seconds):
322
- """将秒数转换为ASS时间格式"""
323
- hours = int(seconds / 3600)
324
- minutes = int((seconds % 3600) / 60)
325
- seconds = int(seconds) % 60
326
- milliseconds = int((seconds - int(seconds)) * 1000)
327
- return "{:01d}:{:02d}:{:02d}.{:02d}".format(hours, minutes, seconds, int(milliseconds / 10))
328
-
329
- def extract_text_from_epub(file_path):
330
- book = epub.read_epub(file_path)
331
- content = []
332
- for item in book.items:
333
- if isinstance(item, epub.EpubHtml):
334
- soup = BeautifulSoup(item.content, 'html.parser')
335
- content.append(soup.get_text())
336
- return '\n'.join(content)
337
-
338
- def extract_text_from_pdf(file_path):
339
- with open(file_path, 'rb') as file:
340
- reader = PdfReader(file)
341
- content = [page.extract_text() for page in reader.pages]
342
- return '\n'.join(content)
343
-
344
- def remove_annotations(text):
345
- # 移除方括号、尖括号和中文方括号中的内容
346
- text = re.sub(r'\[.*?\]', '', text)
347
- text = re.sub(r'\<.*?\>', '', text)
348
- text = re.sub(r'&#8203;``【oaicite:1】``&#8203;', '', text)
349
- return text
350
-
351
- def extract_text_from_file(inputFile):
352
- file_extension = os.path.splitext(inputFile)[1].lower()
353
- if file_extension == ".epub":
354
- return extract_text_from_epub(inputFile)
355
- elif file_extension == ".pdf":
356
- return extract_text_from_pdf(inputFile)
357
- elif file_extension == ".txt":
358
- with open(inputFile, 'r', encoding='utf-8') as f:
359
- return f.read()
360
- else:
361
- raise ValueError(f"Unsupported file format: {file_extension}")
362
-
363
- def split_by_punctuation(sentence):
364
- """按照中文次级标点符号分割句子"""
365
- # 常见的中文次级分隔符号:逗号、分号等
366
- parts = re.split(r'([,,;;])', sentence)
367
- # 将标点符号与前面的词语合并,避免单独标点符号成为一个部分
368
- merged_parts = []
369
- for part in parts:
370
- if part and not part in ',,;;':
371
- merged_parts.append(part)
372
- elif merged_parts:
373
- merged_parts[-1] += part
374
- return merged_parts
375
-
376
- def split_long_sentences(sentence, max_length=30):
377
- """如果中文句子太长,先按标点分割,必要时使用jieba进行分词并分割"""
378
- if len(sentence) > max_length and is_chinese(sentence):
379
- # 首先尝试按照次级标点符号分割
380
- preliminary_parts = split_by_punctuation(sentence)
381
- new_sentences = []
382
-
383
- for part in preliminary_parts:
384
- # 如果部分仍然太长,使用jieba进行分词
385
- if len(part) > max_length:
386
- words = jieba.lcut(part)
387
- current_sentence = ""
388
- for word in words:
389
- if len(current_sentence) + len(word) > max_length:
390
- new_sentences.append(current_sentence)
391
- current_sentence = word
392
- else:
393
- current_sentence += word
394
- if current_sentence:
395
- new_sentences.append(current_sentence)
396
- else:
397
- new_sentences.append(part)
398
-
399
- return new_sentences
400
- return [sentence] # 如果句子不长或不是中文,直接返回
401
-
402
- def extract_and_convert(text):
403
-
404
- # 使用正则表达式找出所有英文单词
405
- english_parts = re.findall(r'\b[A-Za-z]+\b', text) # \b为单词边界标识
406
-
407
- # 对每个英文单词进行片假名转换
408
- kana_parts = ['\n{}\n'.format(romajitable.to_kana(word).katakana) for word in english_parts]
409
-
410
- # 替换原文本中的英文部分
411
- for eng, kana in zip(english_parts, kana_parts):
412
- text = text.replace(eng, kana, 1) # 限制每次只替换一个实例
413
-
414
- return text
415
- # 推理工具
416
- def download_unidic():
417
- try:
418
- Tagger()
419
- print("Tagger launch successfully.")
420
- except Exception as e:
421
- print("UNIDIC dictionary not found, downloading...")
422
- subprocess.run([sys.executable, "-m", "unidic", "download"])
423
- print("Download completed.")
424
-
425
- def kanji_to_hiragana(text):
426
- global tagger
427
- output = ""
428
-
429
- # 更新正则表达式以更准确地区分文本和标点符号
430
- segments = re.findall(r'[一-龥ぁ-んァ-ン\w]+|[^\一-龥ぁ-んァ-ン\w\s]', text, re.UNICODE)
431
-
432
- for segment in segments:
433
- if re.match(r'[一-龥ぁ-んァ-ン\w]+', segment):
434
- # 如果是单词或汉字,转换为平假名
435
- for word in tagger(segment):
436
- kana = word.feature.kana or word.surface
437
- hiragana = jaconv.kata2hira(kana) # 将片假名转换为平假名
438
- output += hiragana
439
- else:
440
- # 如果是标点符号,保持不变
441
- output += segment
442
-
443
- return output
444
-
445
  def get_net_g(model_path: str, device: str, hps):
446
  net_g = SynthesizerTrn(
447
  len(symbols),
@@ -496,6 +123,7 @@ def get_text(text, language_str, hps, device, style_text=None, style_weight=0.7)
496
  language = torch.LongTensor(language)
497
  return bert, ja_bert, en_bert, phone, tone, language
498
 
 
499
  def infer(
500
  text,
501
  sdp_ratio,
@@ -506,22 +134,9 @@ def infer(
506
  style_text=None,
507
  style_weight=0.7,
508
  language = "Auto",
509
- mode = 'pyopenjtalk-V2.3',
510
- skip_start=False,
511
- skip_end=False,
512
  ):
513
- if style_text == None:
514
- style_text = ""
515
- style_weight=0,
516
- if mode == 'fugashi-V2.3':
517
- text = kanji_to_hiragana(text) if is_japanese(text) else text
518
- if language == "JP":
519
- text = translate(text,"jp")
520
- if language == "ZH":
521
- text = translate(text,"zh")
522
  if language == "Auto":
523
  language= 'JP' if is_japanese(text) else 'ZH'
524
- #print(f'{text}:{sdp_ratio}:{noise_scale}:{noise_scale_w}:{length_scale}:{length_scale}:{sid}:{language}:{mode}:{skip_start}:{skip_end}')
525
  bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
526
  text,
527
  language,
@@ -530,20 +145,6 @@ def infer(
530
  style_text=style_text,
531
  style_weight=style_weight,
532
  )
533
- if skip_start:
534
- phones = phones[3:]
535
- tones = tones[3:]
536
- lang_ids = lang_ids[3:]
537
- bert = bert[:, 3:]
538
- ja_bert = ja_bert[:, 3:]
539
- en_bert = en_bert[:, 3:]
540
- if skip_end:
541
- phones = phones[:-2]
542
- tones = tones[:-2]
543
- lang_ids = lang_ids[:-2]
544
- bert = bert[:, :-2]
545
- ja_bert = ja_bert[:, :-2]
546
- en_bert = en_bert[:, :-2]
547
  with torch.no_grad():
548
  x_tst = phones.to(device).unsqueeze(0)
549
  tones = tones.to(device).unsqueeze(0)
@@ -586,106 +187,95 @@ def infer(
586
  ) # , emo
587
  if torch.cuda.is_available():
588
  torch.cuda.empty_cache()
589
- print("Success.")
590
- return audio
 
 
 
 
 
591
 
592
  def loadmodel(model):
593
  _ = net_g.eval()
594
  _ = utils.load_checkpoint(model, net_g, None, skip_optimizer=True)
595
  return "success"
596
 
597
- def generate_audio_and_srt_for_group(
598
- group,
599
- outputPath,
600
- group_index,
601
- sampling_rate,
602
- speaker,
603
- sdp_ratio,
604
- noise_scale,
605
- noise_scale_w,
606
- length_scale,
607
- speakerList,
608
- silenceTime,
609
- language,
610
- mode,
611
- skip_start,
612
- skip_end,
613
- style_text,
614
- style_weight,
615
- ):
616
  audio_fin = []
617
  ass_entries = []
618
  start_time = 0
619
  #speaker = random.choice(cara_list)
620
  ass_header = """[Script Info]
621
- ; 我没意见
622
- Title: Audiobook
623
- ScriptType: v4.00+
624
- WrapStyle: 0
625
- PlayResX: 640
626
- PlayResY: 360
627
- ScaledBorderAndShadow: yes
628
- [V4+ Styles]
629
- Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
630
- Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,1,2,10,10,10,1
631
- [Events]
632
- Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
633
- """
634
 
635
  for sentence in group:
636
  try:
637
- if len(sentence) > 1:
638
- FakeSpeaker = sentence.split("|")[0]
639
- print(FakeSpeaker)
640
- SpeakersList = re.split('\n', speakerList)
641
- if FakeSpeaker in list(hps.data.spk2id.keys()):
642
- speaker = FakeSpeaker
643
- for i in SpeakersList:
644
- if FakeSpeaker == i.split("|")[1]:
645
- speaker = i.split("|")[0]
646
- if sentence != '\n':
647
- text = (remove_annotations(sentence.split("|")[-1]).replace(" ","")+"。").replace(",。","。")
648
- if mode == 'pyopenjtalk-V2.3' or mode == 'fugashi-V2.3':
649
- #print(f'{text}:{sdp_ratio}:{noise_scale}:{noise_scale_w}:{length_scale}:{length_scale}:{speaker}:{language}:{mode}:{skip_start}:{skip_end}')
650
- audio = infer(
651
- text,
652
- sdp_ratio,
653
- noise_scale,
654
- noise_scale_w,
655
- length_scale,
656
- speaker,
657
- style_text,
658
- style_weight,
659
- language,
660
- mode,
661
- skip_start,
662
- skip_end,
663
- )
664
- silence_frames = int(silenceTime * 44010) if is_chinese(sentence) else int(silenceTime * 44010)
665
- silence_data = np.zeros((silence_frames,), dtype=audio.dtype)
666
- audio_fin.append(audio)
667
- audio_fin.append(silence_data)
668
- duration = len(audio) / sampling_rate
669
- print(duration)
670
- end_time = start_time + duration + silenceTime
671
- ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
672
- start_time = end_time
673
  except:
674
  pass
675
  wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
676
  ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
677
- write(wav_filename, sampling_rate, gr.processing_utils.convert_to_16_bit_wav(np.concatenate(audio_fin)))
 
678
 
679
  with open(ass_filename, 'w', encoding='utf-8') as f:
680
  f.write(ass_header + '\n'.join(ass_entries))
681
- return (hps.data.sampling_rate, gr.processing_utils.convert_to_16_bit_wav(np.concatenate(audio_fin)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
 
683
- def generate_audio(
684
- inputFile,
685
- groupSize,
686
- filepath,
687
- silenceTime,
688
- speakerList,
689
  text,
690
  sdp_ratio,
691
  noise_scale,
@@ -694,100 +284,65 @@ def generate_audio(
694
  sid,
695
  style_text=None,
696
  style_weight=0.7,
697
- language = "Auto",
698
- mode = 'pyopenjtalk-V2.3',
699
- sentence_mode = 'sentence',
700
- skip_start=False,
701
- skip_end=False,
702
  ):
703
- if inputFile:
704
- text = extract_text_from_file(inputFile.name)
705
- sentence_mode = 'paragraph'
706
- if mode == 'pyopenjtalk-V2.3' or mode == 'fugashi-V2.3':
707
- if sentence_mode == 'sentence':
708
- audio = infer(
709
- text,
710
- sdp_ratio,
711
- noise_scale,
712
- noise_scale_w,
713
- length_scale,
714
- sid,
715
- style_text,
716
- style_weight,
717
- language,
718
- mode,
719
- skip_start,
720
- skip_end,
721
- )
722
- return (hps.data.sampling_rate,gr.processing_utils.convert_to_16_bit_wav(audio))
723
- if sentence_mode == 'paragraph':
724
- GROUP_SIZE = groupSize
725
- directory_path = filepath if torch.cuda.is_available() else "books"
726
- if os.path.exists(directory_path):
727
- shutil.rmtree(directory_path)
728
- os.makedirs(directory_path)
729
- if language == 'Auto':
730
- sentences = extrac(extract_and_convert(text))
731
- else:
732
- sentences = extrac(text)
733
- for i in range(0, len(sentences), GROUP_SIZE):
734
- group = sentences[i:i+GROUP_SIZE]
735
- if speakerList == "":
736
- speakerList = "无"
737
- result = generate_audio_and_srt_for_group(
738
- group,
739
- directory_path,
740
- i//GROUP_SIZE + 1,
741
- 44100,
742
- sid,
743
- sdp_ratio,
744
- noise_scale,
745
- noise_scale_w,
746
- length_scale,
747
- speakerList,
748
- silenceTime,
749
- language,
750
- mode,
751
- skip_start,
752
- skip_end,
753
- style_text,
754
- style_weight,
755
- )
756
- if not torch.cuda.is_available():
757
- return result
758
- return result
759
- #url = f'{webBase[mode]}?text={text}&speaker={sid}&sdp_ratio={sdp_ratio}&noise_scale={noise_scale}&noise_scale_w={noise_scale_w}&length_scale={length_scale}&language={language}&skip_start={skip_start}&skip_end={skip_end}'
760
- #print(url)
761
- #res = requests.get(url)
762
- #改用post
763
- res = requests.post(webBase[mode], json = {
764
- "groupSize": groupSize,
765
- "filepath": filepath,
766
- "silenceTime": silenceTime,
767
- "speakerList": speakerList,
768
- "text": text,
769
- "speaker": sid,
770
- "sdp_ratio": sdp_ratio,
771
- "noise_scale": noise_scale,
772
- "noise_scale_w": noise_scale_w,
773
- "length_scale": length_scale,
774
- "language": language,
775
- "skip_start": skip_start,
776
- "skip_end": skip_end,
777
- "mode": mode,
778
- "sentence_mode": sentence_mode,
779
- "style_text": style_text,
780
- "style_weight": style_weight
781
- })
782
- audio = res.content
783
- with open('output.wav', 'wb') as code:
784
- code.write(audio)
785
- file_path = "output.wav"
786
- return file_path
787
 
788
  if __name__ == "__main__":
789
- #download_unidic()
790
- #tagger = Tagger()
791
  for dirpath, dirnames, filenames in os.walk('Data/BangDream/models/'):
792
  for filename in filenames:
793
  modelPaths.append(os.path.join(dirpath, filename))
@@ -800,6 +355,7 @@ if __name__ == "__main__":
800
  with gr.Blocks() as app:
801
  gr.Markdown(value="""
802
  ([Bert-Vits2](https://github.com/Stardust-minus/Bert-VITS2) V2.3)少歌邦邦全员在线语音合成\n
 
803
  [好玩的](http://love.soyorin.top/)\n
804
  该界面的真实链接(国内可用): https://mahiruoshi-bangdream-bert-vits2.hf.space/\n
805
  API: https://mahiruoshi-bert-vits2-api.hf.space/ \n
@@ -821,169 +377,36 @@ if __name__ == "__main__":
821
  f'<img style="width:auto;height:400px;" src="https://mahiruoshi-bangdream-bert-vits2.hf.space/file/image/{name}.png">'
822
  '</div>'
823
  )
824
- with gr.Accordion(label="参数设定", open=False):
825
- sdp_ratio = gr.Slider(
826
- minimum=0, maximum=1, value=0.5, step=0.01, label="SDP/DP混合比"
827
- )
828
- noise_scale = gr.Slider(
829
- minimum=0.1, maximum=2, value=0.6, step=0.01, label="Noise:感情调节"
830
- )
831
- noise_scale_w = gr.Slider(
832
- minimum=0.1, maximum=2, value=0.667, step=0.01, label="Noise_W:音素长度"
833
- )
834
- skip_start = gr.Checkbox(label="skip_start")
835
- skip_end = gr.Checkbox(label="skip_end")
836
- speaker = gr.Dropdown(
837
- choices=[name], value=name, label="说话人"
838
- )
839
  length_scale = gr.Slider(
840
  minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
841
  )
842
  language = gr.Dropdown(
843
- choices=languages, value="Auto", label="语言选择,若不选自动则会将输入语言翻译为日语或中文"
844
- )
845
- mode = gr.Dropdown(
846
- choices=["pyopenjtalk-V2.3"], value="pyopenjtalk-V2.3", label="TTS模式,合成少歌角色需要切换成 pyopenjtalk-V2.3-Katakana "
847
- )
848
- sentence_mode = gr.Dropdown(
849
- choices=sentence_modes, value="sentence", label="文本合成模式"
850
- )
851
- with gr.Accordion(label="扩展选项", open=False):
852
- inputFile = gr.UploadButton(label="txt文件输入")
853
- speakerList = gr.TextArea(
854
- label="角色对应表,如果你记不住角色名可以这样,左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList}|{SeakerInUploadText}",
855
- value = "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
856
- )
857
- groupSize = gr.Slider(
858
- minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大句子数"
859
- )
860
- filepath = gr.TextArea(
861
- label="本地合成时的音频存储文件夹(会清空文件夹,别把C盘删了)",
862
- value = "D:/audiobook/book1",
863
- )
864
- silenceTime = gr.Slider(
865
- minimum=0, maximum=1, value=0.5, step=0.01, label="句子的间隔"
866
- )
867
- modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
868
- btnMod = gr.Button("载入模型")
869
- statusa = gr.TextArea(label = "模型加载状态")
870
- btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa])
871
- with gr.Column():
872
- text = gr.TextArea(
873
- label="文本输入,可用'|'分割说话人和文本,注意换行",
874
- info="输入纯日语或者中文",
875
- placeholder=f"{name}|你觉得你是职业歌手吗\n真白|我觉得我是",
876
- value=f"私は{name}です。 "
877
- )
878
- style_text = gr.Textbox(
879
- label="情感辅助文本",
880
- info="语言保持跟主文本一致,文本可以参考训练集:https://huggingface.co/spaces/Mahiruoshi/BangDream-Bert-VITS2/blob/main/filelists/Mygo.list)",
881
- placeholder="使用辅助文本的语意来辅助生成对话(语言保持与主文本相同)\n\n"
882
- "**注意**:不要使用**指令式文本**(如:开心),要使用**带有强烈情感的文本**(如:我好快乐!!!)"
883
- )
884
- style_weight = gr.Slider(
885
- minimum=0,
886
- maximum=1,
887
- value=0.7,
888
- step=0.1,
889
- label="Weight",
890
- info="主文本和辅助文本的bert混合比率,0表示仅主文本,1表示仅辅助文本",
891
- )
892
- btn = gr.Button("点击生成", variant="primary")
893
- audio_output = gr.Audio(label="Output Audio")
894
- btntran = gr.Button("快速中翻日")
895
- translateResult = gr.TextArea(label="使用百度翻译",placeholder="从这里复制翻译后的文本")
896
- btntran.click(translate, inputs=[text], outputs = [translateResult])
897
- btn.click(
898
- generate_audio,
899
- inputs=[
900
- inputFile,
901
- groupSize,
902
- filepath,
903
- silenceTime,
904
- speakerList,
905
- text,
906
- sdp_ratio,
907
- noise_scale,
908
- noise_scale_w,
909
- length_scale,
910
- speaker,
911
- style_text,
912
- style_weight,
913
- language,
914
- mode,
915
- sentence_mode,
916
- skip_start,
917
- skip_end
918
- ],
919
- outputs=[audio_output],
920
- )
921
- for band in SchoolLilst:
922
- with gr.TabItem(band):
923
- for name in SchoolLilst[band]:
924
- with gr.TabItem(name):
925
- with gr.Row():
926
- with gr.Column():
927
- with gr.Row():
928
- gr.Markdown(
929
- '<div align="center">'
930
- f'<img style="width:auto;height:400px;" src="https://mahiruoshi-bangdream-bert-vits2.hf.space/file/image/{name}.png">'
931
- '</div>'
932
  )
933
- with gr.Accordion(label="参数设定", open=False):
934
  sdp_ratio = gr.Slider(
935
  minimum=0, maximum=1, value=0.5, step=0.01, label="SDP/DP混合比"
936
  )
937
  noise_scale = gr.Slider(
938
- minimum=0.1, maximum=2, value=0.6, step=0.01, label="Noise:感情调节"
939
  )
940
  noise_scale_w = gr.Slider(
941
- minimum=0.1, maximum=2, value=0.667, step=0.01, label="Noise_W:音素长度"
942
  )
943
- skip_start = gr.Checkbox(label="skip_start")
944
- skip_end = gr.Checkbox(label="skip_end")
945
  speaker = gr.Dropdown(
946
- choices=[name], value=name, label="说话人"
947
- )
948
- length_scale = gr.Slider(
949
- minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
950
- )
951
- language = gr.Dropdown(
952
- choices=languages, value="Auto", label="语言选择,若不选自动则会将输入语言翻译为日语或中文"
953
- )
954
- mode = gr.Dropdown(
955
- choices=["pyopenjtalk-V2.3-Katakana"], value="pyopenjtalk-V2.3-Katakana", label="TTS模式,合成少歌角色需要切换成 pyopenjtalk-V2.3-Katakana "
956
- )
957
- sentence_mode = gr.Dropdown(
958
- choices=sentence_modes, value="sentence", label="文本合成模式"
959
- )
960
- with gr.Accordion(label="扩展选项", open=False):
961
- inputFile = gr.UploadButton(label="txt文件输入")
962
- speakerList = gr.TextArea(
963
- label="角色对应表,如果你记不住角色名可以这样,左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList}|{SeakerInUploadText}",
964
- value = "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
965
- )
966
- groupSize = gr.Slider(
967
- minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大句子数"
968
- )
969
- filepath = gr.TextArea(
970
- label="本地合成时的音频存储文件夹(会清空文件夹,别把C盘删了)",
971
- value = "D:/audiobook/book1",
972
- )
973
- silenceTime = gr.Slider(
974
- minimum=0, maximum=1, value=0.5, step=0.01, label="句子的间隔"
975
- )
976
  modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
977
  btnMod = gr.Button("载入模型")
978
  statusa = gr.TextArea(label = "模型加载状态")
979
  btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa])
980
  with gr.Column():
981
  text = gr.TextArea(
982
- label="文本输入,可用'|'分割说话人和文本,注意换行",
983
- info="输入纯日语或者中文",
984
- placeholder=f"{name}|你觉得你是职业歌手吗\n真白|我觉得我是",
985
- value=f"私は{name}です。 "
986
- )
987
  style_text = gr.Textbox(
988
  label="情感辅助文本",
989
  info="语言保持跟主文本一致,文本可以参考训练集:https://huggingface.co/spaces/Mahiruoshi/BangDream-Bert-VITS2/blob/main/filelists/Mygo.list)",
@@ -1003,14 +426,10 @@ if __name__ == "__main__":
1003
  btntran = gr.Button("快速中翻日")
1004
  translateResult = gr.TextArea(label="使用百度翻译",placeholder="从这里复制翻译后的文本")
1005
  btntran.click(translate, inputs=[text], outputs = [translateResult])
 
1006
  btn.click(
1007
- generate_audio,
1008
  inputs=[
1009
- inputFile,
1010
- groupSize,
1011
- filepath,
1012
- silenceTime,
1013
- speakerList,
1014
  text,
1015
  sdp_ratio,
1016
  noise_scale,
@@ -1020,12 +439,75 @@ if __name__ == "__main__":
1020
  style_text,
1021
  style_weight,
1022
  language,
1023
- mode,
1024
- sentence_mode,
1025
- skip_start,
1026
- skip_end
1027
  ],
1028
  outputs=[audio_output],
1029
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1030
  print("推理页面已开启!")
1031
- app.launch()
 
24
  from torch.utils.data import Dataset
25
  from torch.utils.data import DataLoader, Dataset
26
  from tqdm import tqdm
27
+ from tools.sentence import extrac, is_japanese, is_chinese, seconds_to_ass_time, extract_text_from_file, remove_annotations,extract_and_convert
28
+
29
 
30
  import gradio as gr
31
 
 
42
  from text.symbols import symbols
43
  import sys
44
  import re
45
+ from tools.translate import translate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  net_g = None
48
 
 
69
  "AveMujica":["祥子","睦","海鈴","にゃむ","初華"],
70
  }
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def get_net_g(model_path: str, device: str, hps):
73
  net_g = SynthesizerTrn(
74
  len(symbols),
 
123
  language = torch.LongTensor(language)
124
  return bert, ja_bert, en_bert, phone, tone, language
125
 
126
+
127
  def infer(
128
  text,
129
  sdp_ratio,
 
134
  style_text=None,
135
  style_weight=0.7,
136
  language = "Auto",
 
 
 
137
  ):
 
 
 
 
 
 
 
 
 
138
  if language == "Auto":
139
  language= 'JP' if is_japanese(text) else 'ZH'
 
140
  bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
141
  text,
142
  language,
 
145
  style_text=style_text,
146
  style_weight=style_weight,
147
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  with torch.no_grad():
149
  x_tst = phones.to(device).unsqueeze(0)
150
  tones = tones.to(device).unsqueeze(0)
 
187
  ) # , emo
188
  if torch.cuda.is_available():
189
  torch.cuda.empty_cache()
190
+ return (hps.data.sampling_rate,gr.processing_utils.convert_to_16_bit_wav(audio))
191
+
192
+ def is_japanese(string):
193
+ for ch in string:
194
+ if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
195
+ return True
196
+ return False
197
 
198
  def loadmodel(model):
199
  _ = net_g.eval()
200
  _ = utils.load_checkpoint(model, net_g, None, skip_optimizer=True)
201
  return "success"
202
 
203
+ def generate_audio_and_srt_for_group(group, outputPath, group_index, sampling_rate, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  audio_fin = []
205
  ass_entries = []
206
  start_time = 0
207
  #speaker = random.choice(cara_list)
208
  ass_header = """[Script Info]
209
+ ; 我没意见
210
+ Title: Audiobook
211
+ ScriptType: v4.00+
212
+ WrapStyle: 0
213
+ PlayResX: 640
214
+ PlayResY: 360
215
+ ScaledBorderAndShadow: yes
216
+ [V4+ Styles]
217
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
218
+ Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,1,2,10,10,10,1
219
+ [Events]
220
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
221
+ """
222
 
223
  for sentence in group:
224
  try:
225
+ FakeSpeaker = sentence.split("|")[0]
226
+ print(FakeSpeaker)
227
+ SpeakersList = re.split('\n', spealerList)
228
+ if FakeSpeaker in list(hps.data.spk2id.keys()):
229
+ speaker = FakeSpeaker
230
+ for i in SpeakersList:
231
+ if FakeSpeaker == i.split("|")[1]:
232
+ speaker = i.split("|")[0]
233
+ if sentence != '\n':
234
+ audio = infer_simple((remove_annotations(sentence.split("|")[-1]).replace(" ","")+"。").replace(",。","。").replace("。。","。"), sdp_ratio, noise_scale, noise_scale_w, length_scale,speaker)
235
+ silence_frames = int(silenceTime * 44010) if is_chinese(sentence) else int(silenceTime * 44010)
236
+ silence_data = np.zeros((silence_frames,), dtype=audio.dtype)
237
+ audio_fin.append(audio)
238
+ audio_fin.append(silence_data)
239
+
240
+ duration = len(audio) / sampling_rate
241
+ print(duration)
242
+ end_time = start_time + duration + silenceTime
243
+ ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
244
+ start_time = end_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  except:
246
  pass
247
  wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
248
  ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
249
+
250
+ write(wav_filename, sampling_rate, np.concatenate(audio_fin))
251
 
252
  with open(ass_filename, 'w', encoding='utf-8') as f:
253
  f.write(ass_header + '\n'.join(ass_entries))
254
+ return (hps.data.sampling_rate, np.concatenate(audio_fin))
255
+
256
+ def audiobook(inputFile, groupsize, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime,filepath,raw_text):
257
+ directory_path = filepath if torch.cuda.is_available() else "books"
258
+
259
+ if os.path.exists(directory_path):
260
+ shutil.rmtree(directory_path)
261
+
262
+ os.makedirs(directory_path)
263
+ if inputFile:
264
+ text = extract_text_from_file(inputFile.name)
265
+ else:
266
+ text = raw_text
267
+ sentences = extrac(extract_and_convert(text))
268
+ GROUP_SIZE = groupsize
269
+ for i in range(0, len(sentences), GROUP_SIZE):
270
+ group = sentences[i:i+GROUP_SIZE]
271
+ if spealerList == "":
272
+ spealerList = "无"
273
+ result = generate_audio_and_srt_for_group(group,directory_path, i//GROUP_SIZE + 1, 44100, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale,spealerList,silenceTime)
274
+ if not torch.cuda.is_available():
275
+ return result
276
+ return result
277
 
278
+ def infer_simple(
 
 
 
 
 
279
  text,
280
  sdp_ratio,
281
  noise_scale,
 
284
  sid,
285
  style_text=None,
286
  style_weight=0.7,
 
 
 
 
 
287
  ):
288
+ if is_chinese(text) or is_japanese(text):
289
+ if len(text) > 1:
290
+ language= 'JP' if is_japanese(text) else 'ZH'
291
+ bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
292
+ text,
293
+ language,
294
+ hps,
295
+ device,
296
+ style_text="",
297
+ style_weight=0,
298
+ )
299
+ with torch.no_grad():
300
+ x_tst = phones.to(device).unsqueeze(0)
301
+ tones = tones.to(device).unsqueeze(0)
302
+ lang_ids = lang_ids.to(device).unsqueeze(0)
303
+ bert = bert.to(device).unsqueeze(0)
304
+ ja_bert = ja_bert.to(device).unsqueeze(0)
305
+ en_bert = en_bert.to(device).unsqueeze(0)
306
+ x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
307
+ # emo = emo.to(device).unsqueeze(0)
308
+ del phones
309
+ speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
310
+ audio = (
311
+ net_g.infer(
312
+ x_tst,
313
+ x_tst_lengths,
314
+ speakers,
315
+ tones,
316
+ lang_ids,
317
+ bert,
318
+ ja_bert,
319
+ en_bert,
320
+ sdp_ratio=sdp_ratio,
321
+ noise_scale=noise_scale,
322
+ noise_scale_w=noise_scale_w,
323
+ length_scale=length_scale,
324
+ )[0][0, 0]
325
+ .data.cpu()
326
+ .float()
327
+ .numpy()
328
+ )
329
+ del (
330
+ x_tst,
331
+ tones,
332
+ lang_ids,
333
+ bert,
334
+ x_tst_lengths,
335
+ speakers,
336
+ ja_bert,
337
+ en_bert,
338
+ ) # , emo
339
+ if torch.cuda.is_available():
340
+ torch.cuda.empty_cache()
341
+ return audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
  if __name__ == "__main__":
344
+ languages = [ "Auto", "ZH", "JP"]
345
+ modelPaths = []
346
  for dirpath, dirnames, filenames in os.walk('Data/BangDream/models/'):
347
  for filename in filenames:
348
  modelPaths.append(os.path.join(dirpath, filename))
 
355
  with gr.Blocks() as app:
356
  gr.Markdown(value="""
357
  ([Bert-Vits2](https://github.com/Stardust-minus/Bert-VITS2) V2.3)少歌邦邦全员在线语音合成\n
358
+ 镜像 [V2.2](https://huggingface.co/spaces/Mahiruoshi/MyGO_VIts-bert)\n
359
  [好玩的](http://love.soyorin.top/)\n
360
  该界面的真实链接(国内可用): https://mahiruoshi-bangdream-bert-vits2.hf.space/\n
361
  API: https://mahiruoshi-bert-vits2-api.hf.space/ \n
 
377
  f'<img style="width:auto;height:400px;" src="https://mahiruoshi-bangdream-bert-vits2.hf.space/file/image/{name}.png">'
378
  '</div>'
379
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  length_scale = gr.Slider(
381
  minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
382
  )
383
  language = gr.Dropdown(
384
+ choices=languages, value="Auto", label="语言"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  )
386
+ with gr.Accordion(label="参数设定", open=True):
387
  sdp_ratio = gr.Slider(
388
  minimum=0, maximum=1, value=0.5, step=0.01, label="SDP/DP混合比"
389
  )
390
  noise_scale = gr.Slider(
391
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
392
  )
393
  noise_scale_w = gr.Slider(
394
+ minimum=0.1, maximum=2, value=0.667, step=0.01, label="音素长度"
395
  )
 
 
396
  speaker = gr.Dropdown(
397
+ choices=speakers, value=name, label="说话人"
398
+ )
399
+ with gr.Accordion(label="切换模型", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
401
  btnMod = gr.Button("载入模型")
402
  statusa = gr.TextArea(label = "模型加载状态")
403
  btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa])
404
  with gr.Column():
405
  text = gr.TextArea(
406
+ label="文本输入",
407
+ info="输入纯日语或者中文",
408
+ value="我是来结束这个乐队的。",
409
+ )
 
410
  style_text = gr.Textbox(
411
  label="情感辅助文本",
412
  info="语言保持跟主文本一致,文本可以参考训练集:https://huggingface.co/spaces/Mahiruoshi/BangDream-Bert-VITS2/blob/main/filelists/Mygo.list)",
 
426
  btntran = gr.Button("快速中翻日")
427
  translateResult = gr.TextArea(label="使用百度翻译",placeholder="从这里复制翻译后的文本")
428
  btntran.click(translate, inputs=[text], outputs = [translateResult])
429
+
430
  btn.click(
431
+ infer,
432
  inputs=[
 
 
 
 
 
433
  text,
434
  sdp_ratio,
435
  noise_scale,
 
439
  style_text,
440
  style_weight,
441
  language,
 
 
 
 
442
  ],
443
  outputs=[audio_output],
444
  )
445
+ with gr.TabItem('少歌在2.2版本'):
446
+ gr.Markdown(value="""
447
+ <div align="center">
448
+ <iframe style="width:100%;height:400px;" src="https://mahiruoshi-mygo-vits-bert.hf.space/" frameborder="0"></iframe>'
449
+ </div>"""
450
+ )
451
+ with gr.Tab('拓展功能'):
452
+ with gr.Row():
453
+ with gr.Column():
454
+ gr.Markdown(
455
+ f"从 <a href='https://nijigaku.top/2023/10/03/BangDreamTTS/'>我的博客站点</a> 查看自制galgame使用说明\n</a>"
456
+ )
457
+ inputFile = gr.UploadButton(label="txt文件输入")
458
+ raw_text = gr.TextArea(
459
+ label="文本输入",
460
+ info="输入纯日语或者中文",
461
+ value="つくし|我是来结束这个乐队的。",
462
+ )
463
+ groupSize = gr.Slider(
464
+ minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大字数"
465
+ )
466
+ silenceTime = gr.Slider(
467
+ minimum=0, maximum=1, value=0.5, step=0.01, label="句子的间隔"
468
+ )
469
+ filepath = gr.TextArea(
470
+ label="本地合成时的音频存储文件夹(会清空文件夹)",
471
+ value = "D:/audiobook/book1",
472
+ )
473
+ spealerList = gr.TextArea(
474
+ label="角色对应表,左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList}|{SeakerInUploadText}",
475
+ placeholder = "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
476
+ )
477
+ speaker = gr.Dropdown(
478
+ choices=speakers, value = "ましろ", label="选择默认说话人"
479
+ )
480
+ with gr.Column():
481
+ sdp_ratio = gr.Slider(
482
+ minimum=0, maximum=1, value=0.2, step=0.01, label="SDP/DP混合比"
483
+ )
484
+ noise_scale = gr.Slider(
485
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
486
+ )
487
+ noise_scale_w = gr.Slider(
488
+ minimum=0.1, maximum=2, value=0.667, step=0.01, label="音素长度"
489
+ )
490
+ length_scale = gr.Slider(
491
+ minimum=0.1, maximum=2, value=1, step=0.01, label="生成长度"
492
+ )
493
+ LastAudioOutput = gr.Audio(label="当使用cuda时才能在本地文件夹浏览全部文件")
494
+ btn2 = gr.Button("点击生成", variant="primary")
495
+ btn2.click(
496
+ audiobook,
497
+ inputs=[
498
+ inputFile,
499
+ groupSize,
500
+ speaker,
501
+ sdp_ratio,
502
+ noise_scale,
503
+ noise_scale_w,
504
+ length_scale,
505
+ spealerList,
506
+ silenceTime,
507
+ filepath,
508
+ raw_text
509
+ ],
510
+ outputs=[LastAudioOutput],
511
+ )
512
  print("推理页面已开启!")
513
+ app.launch(share=True)