Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- bert/ProsodyModel.py +75 -0
- bert/__init__.py +1 -0
- bert/config.json +19 -0
- bert/prosody_tool.py +426 -0
- bert/vocab.txt +0 -0
bert/ProsodyModel.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from transformers import BertModel, BertConfig, BertTokenizer
|
7 |
+
|
8 |
+
|
9 |
+
class CharEmbedding(nn.Module):
|
10 |
+
def __init__(self, model_dir):
|
11 |
+
super().__init__()
|
12 |
+
self.tokenizer = BertTokenizer.from_pretrained(model_dir)
|
13 |
+
self.bert_config = BertConfig.from_pretrained(model_dir)
|
14 |
+
self.hidden_size = self.bert_config.hidden_size
|
15 |
+
self.bert = BertModel(self.bert_config)
|
16 |
+
self.proj = nn.Linear(self.hidden_size, 256)
|
17 |
+
self.linear = nn.Linear(256, 3)
|
18 |
+
|
19 |
+
def text2Token(self, text):
|
20 |
+
token = self.tokenizer.tokenize(text)
|
21 |
+
txtid = self.tokenizer.convert_tokens_to_ids(token)
|
22 |
+
return txtid
|
23 |
+
|
24 |
+
def forward(self, inputs_ids, inputs_masks, tokens_type_ids):
|
25 |
+
out_seq = self.bert(input_ids=inputs_ids,
|
26 |
+
attention_mask=inputs_masks,
|
27 |
+
token_type_ids=tokens_type_ids)[0]
|
28 |
+
out_seq = self.proj(out_seq)
|
29 |
+
return out_seq
|
30 |
+
|
31 |
+
|
32 |
+
class TTSProsody(object):
|
33 |
+
def __init__(self, path, device):
|
34 |
+
self.device = device
|
35 |
+
self.char_model = CharEmbedding(path)
|
36 |
+
self.char_model.load_state_dict(
|
37 |
+
torch.load(
|
38 |
+
os.path.join(path, 'prosody_model.pt'),
|
39 |
+
map_location="cpu"
|
40 |
+
),
|
41 |
+
strict=False
|
42 |
+
)
|
43 |
+
self.char_model.eval()
|
44 |
+
self.char_model.to(self.device)
|
45 |
+
|
46 |
+
def get_char_embeds(self, text):
|
47 |
+
input_ids = self.char_model.text2Token(text)
|
48 |
+
input_masks = [1] * len(input_ids)
|
49 |
+
type_ids = [0] * len(input_ids)
|
50 |
+
input_ids = torch.LongTensor([input_ids]).to(self.device)
|
51 |
+
input_masks = torch.LongTensor([input_masks]).to(self.device)
|
52 |
+
type_ids = torch.LongTensor([type_ids]).to(self.device)
|
53 |
+
|
54 |
+
with torch.no_grad():
|
55 |
+
char_embeds = self.char_model(
|
56 |
+
input_ids, input_masks, type_ids).squeeze(0).cpu()
|
57 |
+
return char_embeds
|
58 |
+
|
59 |
+
def expand_for_phone(self, char_embeds, length): # length of phones for char
|
60 |
+
assert char_embeds.size(0) == len(length)
|
61 |
+
expand_vecs = list()
|
62 |
+
for vec, leng in zip(char_embeds, length):
|
63 |
+
vec = vec.expand(leng, -1)
|
64 |
+
expand_vecs.append(vec)
|
65 |
+
expand_embeds = torch.cat(expand_vecs, 0)
|
66 |
+
assert expand_embeds.size(0) == sum(length)
|
67 |
+
return expand_embeds.numpy()
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
72 |
+
prosody = TTSProsody('./bert/', device)
|
73 |
+
while True:
|
74 |
+
text = input("请输入文本:")
|
75 |
+
prosody.get_char_embeds(text)
|
bert/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .ProsodyModel import TTSProsody
|
bert/config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"attention_probs_dropout_prob": 0.1,
|
3 |
+
"directionality": "bidi",
|
4 |
+
"hidden_act": "gelu",
|
5 |
+
"hidden_dropout_prob": 0.1,
|
6 |
+
"hidden_size": 768,
|
7 |
+
"initializer_range": 0.02,
|
8 |
+
"intermediate_size": 3072,
|
9 |
+
"max_position_embeddings": 512,
|
10 |
+
"num_attention_heads": 12,
|
11 |
+
"num_hidden_layers": 12,
|
12 |
+
"pooler_fc_size": 768,
|
13 |
+
"pooler_num_attention_heads": 12,
|
14 |
+
"pooler_num_fc_layers": 3,
|
15 |
+
"pooler_size_per_head": 128,
|
16 |
+
"pooler_type": "first_token_transform",
|
17 |
+
"type_vocab_size": 2,
|
18 |
+
"vocab_size": 21128
|
19 |
+
}
|
bert/prosody_tool.py
ADDED
@@ -0,0 +1,426 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def is_chinese(uchar):
|
2 |
+
if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
|
3 |
+
return True
|
4 |
+
else:
|
5 |
+
return False
|
6 |
+
|
7 |
+
|
8 |
+
pinyin_dict = {
|
9 |
+
"a": ("^", "a"),
|
10 |
+
"ai": ("^", "ai"),
|
11 |
+
"an": ("^", "an"),
|
12 |
+
"ang": ("^", "ang"),
|
13 |
+
"ao": ("^", "ao"),
|
14 |
+
"ba": ("b", "a"),
|
15 |
+
"bai": ("b", "ai"),
|
16 |
+
"ban": ("b", "an"),
|
17 |
+
"bang": ("b", "ang"),
|
18 |
+
"bao": ("b", "ao"),
|
19 |
+
"be": ("b", "e"),
|
20 |
+
"bei": ("b", "ei"),
|
21 |
+
"ben": ("b", "en"),
|
22 |
+
"beng": ("b", "eng"),
|
23 |
+
"bi": ("b", "i"),
|
24 |
+
"bian": ("b", "ian"),
|
25 |
+
"biao": ("b", "iao"),
|
26 |
+
"bie": ("b", "ie"),
|
27 |
+
"bin": ("b", "in"),
|
28 |
+
"bing": ("b", "ing"),
|
29 |
+
"bo": ("b", "o"),
|
30 |
+
"bu": ("b", "u"),
|
31 |
+
"ca": ("c", "a"),
|
32 |
+
"cai": ("c", "ai"),
|
33 |
+
"can": ("c", "an"),
|
34 |
+
"cang": ("c", "ang"),
|
35 |
+
"cao": ("c", "ao"),
|
36 |
+
"ce": ("c", "e"),
|
37 |
+
"cen": ("c", "en"),
|
38 |
+
"ceng": ("c", "eng"),
|
39 |
+
"cha": ("ch", "a"),
|
40 |
+
"chai": ("ch", "ai"),
|
41 |
+
"chan": ("ch", "an"),
|
42 |
+
"chang": ("ch", "ang"),
|
43 |
+
"chao": ("ch", "ao"),
|
44 |
+
"che": ("ch", "e"),
|
45 |
+
"chen": ("ch", "en"),
|
46 |
+
"cheng": ("ch", "eng"),
|
47 |
+
"chi": ("ch", "iii"),
|
48 |
+
"chong": ("ch", "ong"),
|
49 |
+
"chou": ("ch", "ou"),
|
50 |
+
"chu": ("ch", "u"),
|
51 |
+
"chua": ("ch", "ua"),
|
52 |
+
"chuai": ("ch", "uai"),
|
53 |
+
"chuan": ("ch", "uan"),
|
54 |
+
"chuang": ("ch", "uang"),
|
55 |
+
"chui": ("ch", "uei"),
|
56 |
+
"chun": ("ch", "uen"),
|
57 |
+
"chuo": ("ch", "uo"),
|
58 |
+
"ci": ("c", "ii"),
|
59 |
+
"cong": ("c", "ong"),
|
60 |
+
"cou": ("c", "ou"),
|
61 |
+
"cu": ("c", "u"),
|
62 |
+
"cuan": ("c", "uan"),
|
63 |
+
"cui": ("c", "uei"),
|
64 |
+
"cun": ("c", "uen"),
|
65 |
+
"cuo": ("c", "uo"),
|
66 |
+
"da": ("d", "a"),
|
67 |
+
"dai": ("d", "ai"),
|
68 |
+
"dan": ("d", "an"),
|
69 |
+
"dang": ("d", "ang"),
|
70 |
+
"dao": ("d", "ao"),
|
71 |
+
"de": ("d", "e"),
|
72 |
+
"dei": ("d", "ei"),
|
73 |
+
"den": ("d", "en"),
|
74 |
+
"deng": ("d", "eng"),
|
75 |
+
"di": ("d", "i"),
|
76 |
+
"dia": ("d", "ia"),
|
77 |
+
"dian": ("d", "ian"),
|
78 |
+
"diao": ("d", "iao"),
|
79 |
+
"die": ("d", "ie"),
|
80 |
+
"ding": ("d", "ing"),
|
81 |
+
"diu": ("d", "iou"),
|
82 |
+
"dong": ("d", "ong"),
|
83 |
+
"dou": ("d", "ou"),
|
84 |
+
"du": ("d", "u"),
|
85 |
+
"duan": ("d", "uan"),
|
86 |
+
"dui": ("d", "uei"),
|
87 |
+
"dun": ("d", "uen"),
|
88 |
+
"duo": ("d", "uo"),
|
89 |
+
"e": ("^", "e"),
|
90 |
+
"ei": ("^", "ei"),
|
91 |
+
"en": ("^", "en"),
|
92 |
+
"ng": ("^", "en"),
|
93 |
+
"eng": ("^", "eng"),
|
94 |
+
"er": ("^", "er"),
|
95 |
+
"fa": ("f", "a"),
|
96 |
+
"fan": ("f", "an"),
|
97 |
+
"fang": ("f", "ang"),
|
98 |
+
"fei": ("f", "ei"),
|
99 |
+
"fen": ("f", "en"),
|
100 |
+
"feng": ("f", "eng"),
|
101 |
+
"fo": ("f", "o"),
|
102 |
+
"fou": ("f", "ou"),
|
103 |
+
"fu": ("f", "u"),
|
104 |
+
"ga": ("g", "a"),
|
105 |
+
"gai": ("g", "ai"),
|
106 |
+
"gan": ("g", "an"),
|
107 |
+
"gang": ("g", "ang"),
|
108 |
+
"gao": ("g", "ao"),
|
109 |
+
"ge": ("g", "e"),
|
110 |
+
"gei": ("g", "ei"),
|
111 |
+
"gen": ("g", "en"),
|
112 |
+
"geng": ("g", "eng"),
|
113 |
+
"gong": ("g", "ong"),
|
114 |
+
"gou": ("g", "ou"),
|
115 |
+
"gu": ("g", "u"),
|
116 |
+
"gua": ("g", "ua"),
|
117 |
+
"guai": ("g", "uai"),
|
118 |
+
"guan": ("g", "uan"),
|
119 |
+
"guang": ("g", "uang"),
|
120 |
+
"gui": ("g", "uei"),
|
121 |
+
"gun": ("g", "uen"),
|
122 |
+
"guo": ("g", "uo"),
|
123 |
+
"ha": ("h", "a"),
|
124 |
+
"hai": ("h", "ai"),
|
125 |
+
"han": ("h", "an"),
|
126 |
+
"hang": ("h", "ang"),
|
127 |
+
"hao": ("h", "ao"),
|
128 |
+
"he": ("h", "e"),
|
129 |
+
"hei": ("h", "ei"),
|
130 |
+
"hen": ("h", "en"),
|
131 |
+
"heng": ("h", "eng"),
|
132 |
+
"hong": ("h", "ong"),
|
133 |
+
"hou": ("h", "ou"),
|
134 |
+
"hu": ("h", "u"),
|
135 |
+
"hua": ("h", "ua"),
|
136 |
+
"huai": ("h", "uai"),
|
137 |
+
"huan": ("h", "uan"),
|
138 |
+
"huang": ("h", "uang"),
|
139 |
+
"hui": ("h", "uei"),
|
140 |
+
"hun": ("h", "uen"),
|
141 |
+
"huo": ("h", "uo"),
|
142 |
+
"ji": ("j", "i"),
|
143 |
+
"jia": ("j", "ia"),
|
144 |
+
"jian": ("j", "ian"),
|
145 |
+
"jiang": ("j", "iang"),
|
146 |
+
"jiao": ("j", "iao"),
|
147 |
+
"jie": ("j", "ie"),
|
148 |
+
"jin": ("j", "in"),
|
149 |
+
"jing": ("j", "ing"),
|
150 |
+
"jiong": ("j", "iong"),
|
151 |
+
"jiu": ("j", "iou"),
|
152 |
+
"ju": ("j", "v"),
|
153 |
+
"juan": ("j", "van"),
|
154 |
+
"jue": ("j", "ve"),
|
155 |
+
"jun": ("j", "vn"),
|
156 |
+
"ka": ("k", "a"),
|
157 |
+
"kai": ("k", "ai"),
|
158 |
+
"kan": ("k", "an"),
|
159 |
+
"kang": ("k", "ang"),
|
160 |
+
"kao": ("k", "ao"),
|
161 |
+
"ke": ("k", "e"),
|
162 |
+
"kei": ("k", "ei"),
|
163 |
+
"ken": ("k", "en"),
|
164 |
+
"keng": ("k", "eng"),
|
165 |
+
"kong": ("k", "ong"),
|
166 |
+
"kou": ("k", "ou"),
|
167 |
+
"ku": ("k", "u"),
|
168 |
+
"kua": ("k", "ua"),
|
169 |
+
"kuai": ("k", "uai"),
|
170 |
+
"kuan": ("k", "uan"),
|
171 |
+
"kuang": ("k", "uang"),
|
172 |
+
"kui": ("k", "uei"),
|
173 |
+
"kun": ("k", "uen"),
|
174 |
+
"kuo": ("k", "uo"),
|
175 |
+
"la": ("l", "a"),
|
176 |
+
"lai": ("l", "ai"),
|
177 |
+
"lan": ("l", "an"),
|
178 |
+
"lang": ("l", "ang"),
|
179 |
+
"lao": ("l", "ao"),
|
180 |
+
"le": ("l", "e"),
|
181 |
+
"lei": ("l", "ei"),
|
182 |
+
"leng": ("l", "eng"),
|
183 |
+
"li": ("l", "i"),
|
184 |
+
"lia": ("l", "ia"),
|
185 |
+
"lian": ("l", "ian"),
|
186 |
+
"liang": ("l", "iang"),
|
187 |
+
"liao": ("l", "iao"),
|
188 |
+
"lie": ("l", "ie"),
|
189 |
+
"lin": ("l", "in"),
|
190 |
+
"ling": ("l", "ing"),
|
191 |
+
"liu": ("l", "iou"),
|
192 |
+
"lo": ("l", "o"),
|
193 |
+
"long": ("l", "ong"),
|
194 |
+
"lou": ("l", "ou"),
|
195 |
+
"lu": ("l", "u"),
|
196 |
+
"lv": ("l", "v"),
|
197 |
+
"luan": ("l", "uan"),
|
198 |
+
"lve": ("l", "ve"),
|
199 |
+
"lue": ("l", "ve"),
|
200 |
+
"lun": ("l", "uen"),
|
201 |
+
"luo": ("l", "uo"),
|
202 |
+
"ma": ("m", "a"),
|
203 |
+
"mai": ("m", "ai"),
|
204 |
+
"man": ("m", "an"),
|
205 |
+
"mang": ("m", "ang"),
|
206 |
+
"mao": ("m", "ao"),
|
207 |
+
"me": ("m", "e"),
|
208 |
+
"mei": ("m", "ei"),
|
209 |
+
"men": ("m", "en"),
|
210 |
+
"meng": ("m", "eng"),
|
211 |
+
"mi": ("m", "i"),
|
212 |
+
"mian": ("m", "ian"),
|
213 |
+
"miao": ("m", "iao"),
|
214 |
+
"mie": ("m", "ie"),
|
215 |
+
"min": ("m", "in"),
|
216 |
+
"ming": ("m", "ing"),
|
217 |
+
"miu": ("m", "iou"),
|
218 |
+
"mo": ("m", "o"),
|
219 |
+
"mou": ("m", "ou"),
|
220 |
+
"mu": ("m", "u"),
|
221 |
+
"na": ("n", "a"),
|
222 |
+
"nai": ("n", "ai"),
|
223 |
+
"nan": ("n", "an"),
|
224 |
+
"nang": ("n", "ang"),
|
225 |
+
"nao": ("n", "ao"),
|
226 |
+
"ne": ("n", "e"),
|
227 |
+
"nei": ("n", "ei"),
|
228 |
+
"nen": ("n", "en"),
|
229 |
+
"neng": ("n", "eng"),
|
230 |
+
"ni": ("n", "i"),
|
231 |
+
"nia": ("n", "ia"),
|
232 |
+
"nian": ("n", "ian"),
|
233 |
+
"niang": ("n", "iang"),
|
234 |
+
"niao": ("n", "iao"),
|
235 |
+
"nie": ("n", "ie"),
|
236 |
+
"nin": ("n", "in"),
|
237 |
+
"ning": ("n", "ing"),
|
238 |
+
"niu": ("n", "iou"),
|
239 |
+
"nong": ("n", "ong"),
|
240 |
+
"nou": ("n", "ou"),
|
241 |
+
"nu": ("n", "u"),
|
242 |
+
"nv": ("n", "v"),
|
243 |
+
"nuan": ("n", "uan"),
|
244 |
+
"nve": ("n", "ve"),
|
245 |
+
"nue": ("n", "ve"),
|
246 |
+
"nuo": ("n", "uo"),
|
247 |
+
"o": ("^", "o"),
|
248 |
+
"ou": ("^", "ou"),
|
249 |
+
"pa": ("p", "a"),
|
250 |
+
"pai": ("p", "ai"),
|
251 |
+
"pan": ("p", "an"),
|
252 |
+
"pang": ("p", "ang"),
|
253 |
+
"pao": ("p", "ao"),
|
254 |
+
"pe": ("p", "e"),
|
255 |
+
"pei": ("p", "ei"),
|
256 |
+
"pen": ("p", "en"),
|
257 |
+
"peng": ("p", "eng"),
|
258 |
+
"pi": ("p", "i"),
|
259 |
+
"pian": ("p", "ian"),
|
260 |
+
"piao": ("p", "iao"),
|
261 |
+
"pie": ("p", "ie"),
|
262 |
+
"pin": ("p", "in"),
|
263 |
+
"ping": ("p", "ing"),
|
264 |
+
"po": ("p", "o"),
|
265 |
+
"pou": ("p", "ou"),
|
266 |
+
"pu": ("p", "u"),
|
267 |
+
"qi": ("q", "i"),
|
268 |
+
"qia": ("q", "ia"),
|
269 |
+
"qian": ("q", "ian"),
|
270 |
+
"qiang": ("q", "iang"),
|
271 |
+
"qiao": ("q", "iao"),
|
272 |
+
"qie": ("q", "ie"),
|
273 |
+
"qin": ("q", "in"),
|
274 |
+
"qing": ("q", "ing"),
|
275 |
+
"qiong": ("q", "iong"),
|
276 |
+
"qiu": ("q", "iou"),
|
277 |
+
"qu": ("q", "v"),
|
278 |
+
"quan": ("q", "van"),
|
279 |
+
"que": ("q", "ve"),
|
280 |
+
"qun": ("q", "vn"),
|
281 |
+
"ran": ("r", "an"),
|
282 |
+
"rang": ("r", "ang"),
|
283 |
+
"rao": ("r", "ao"),
|
284 |
+
"re": ("r", "e"),
|
285 |
+
"ren": ("r", "en"),
|
286 |
+
"reng": ("r", "eng"),
|
287 |
+
"ri": ("r", "iii"),
|
288 |
+
"rong": ("r", "ong"),
|
289 |
+
"rou": ("r", "ou"),
|
290 |
+
"ru": ("r", "u"),
|
291 |
+
"rua": ("r", "ua"),
|
292 |
+
"ruan": ("r", "uan"),
|
293 |
+
"rui": ("r", "uei"),
|
294 |
+
"run": ("r", "uen"),
|
295 |
+
"ruo": ("r", "uo"),
|
296 |
+
"sa": ("s", "a"),
|
297 |
+
"sai": ("s", "ai"),
|
298 |
+
"san": ("s", "an"),
|
299 |
+
"sang": ("s", "ang"),
|
300 |
+
"sao": ("s", "ao"),
|
301 |
+
"se": ("s", "e"),
|
302 |
+
"sen": ("s", "en"),
|
303 |
+
"seng": ("s", "eng"),
|
304 |
+
"sha": ("sh", "a"),
|
305 |
+
"shai": ("sh", "ai"),
|
306 |
+
"shan": ("sh", "an"),
|
307 |
+
"shang": ("sh", "ang"),
|
308 |
+
"shao": ("sh", "ao"),
|
309 |
+
"she": ("sh", "e"),
|
310 |
+
"shei": ("sh", "ei"),
|
311 |
+
"shen": ("sh", "en"),
|
312 |
+
"sheng": ("sh", "eng"),
|
313 |
+
"shi": ("sh", "iii"),
|
314 |
+
"shou": ("sh", "ou"),
|
315 |
+
"shu": ("sh", "u"),
|
316 |
+
"shua": ("sh", "ua"),
|
317 |
+
"shuai": ("sh", "uai"),
|
318 |
+
"shuan": ("sh", "uan"),
|
319 |
+
"shuang": ("sh", "uang"),
|
320 |
+
"shui": ("sh", "uei"),
|
321 |
+
"shun": ("sh", "uen"),
|
322 |
+
"shuo": ("sh", "uo"),
|
323 |
+
"si": ("s", "ii"),
|
324 |
+
"song": ("s", "ong"),
|
325 |
+
"sou": ("s", "ou"),
|
326 |
+
"su": ("s", "u"),
|
327 |
+
"suan": ("s", "uan"),
|
328 |
+
"sui": ("s", "uei"),
|
329 |
+
"sun": ("s", "uen"),
|
330 |
+
"suo": ("s", "uo"),
|
331 |
+
"ta": ("t", "a"),
|
332 |
+
"tai": ("t", "ai"),
|
333 |
+
"tan": ("t", "an"),
|
334 |
+
"tang": ("t", "ang"),
|
335 |
+
"tao": ("t", "ao"),
|
336 |
+
"te": ("t", "e"),
|
337 |
+
"tei": ("t", "ei"),
|
338 |
+
"teng": ("t", "eng"),
|
339 |
+
"ti": ("t", "i"),
|
340 |
+
"tian": ("t", "ian"),
|
341 |
+
"tiao": ("t", "iao"),
|
342 |
+
"tie": ("t", "ie"),
|
343 |
+
"ting": ("t", "ing"),
|
344 |
+
"tong": ("t", "ong"),
|
345 |
+
"tou": ("t", "ou"),
|
346 |
+
"tu": ("t", "u"),
|
347 |
+
"tuan": ("t", "uan"),
|
348 |
+
"tui": ("t", "uei"),
|
349 |
+
"tun": ("t", "uen"),
|
350 |
+
"tuo": ("t", "uo"),
|
351 |
+
"wa": ("^", "ua"),
|
352 |
+
"wai": ("^", "uai"),
|
353 |
+
"wan": ("^", "uan"),
|
354 |
+
"wang": ("^", "uang"),
|
355 |
+
"wei": ("^", "uei"),
|
356 |
+
"wen": ("^", "uen"),
|
357 |
+
"weng": ("^", "ueng"),
|
358 |
+
"wo": ("^", "uo"),
|
359 |
+
"wu": ("^", "u"),
|
360 |
+
"xi": ("x", "i"),
|
361 |
+
"xia": ("x", "ia"),
|
362 |
+
"xian": ("x", "ian"),
|
363 |
+
"xiang": ("x", "iang"),
|
364 |
+
"xiao": ("x", "iao"),
|
365 |
+
"xie": ("x", "ie"),
|
366 |
+
"xin": ("x", "in"),
|
367 |
+
"xing": ("x", "ing"),
|
368 |
+
"xiong": ("x", "iong"),
|
369 |
+
"xiu": ("x", "iou"),
|
370 |
+
"xu": ("x", "v"),
|
371 |
+
"xuan": ("x", "van"),
|
372 |
+
"xue": ("x", "ve"),
|
373 |
+
"xun": ("x", "vn"),
|
374 |
+
"ya": ("^", "ia"),
|
375 |
+
"yan": ("^", "ian"),
|
376 |
+
"yang": ("^", "iang"),
|
377 |
+
"yao": ("^", "iao"),
|
378 |
+
"ye": ("^", "ie"),
|
379 |
+
"yi": ("^", "i"),
|
380 |
+
"yin": ("^", "in"),
|
381 |
+
"ying": ("^", "ing"),
|
382 |
+
"yo": ("^", "iou"),
|
383 |
+
"yong": ("^", "iong"),
|
384 |
+
"you": ("^", "iou"),
|
385 |
+
"yu": ("^", "v"),
|
386 |
+
"yuan": ("^", "van"),
|
387 |
+
"yue": ("^", "ve"),
|
388 |
+
"yun": ("^", "vn"),
|
389 |
+
"za": ("z", "a"),
|
390 |
+
"zai": ("z", "ai"),
|
391 |
+
"zan": ("z", "an"),
|
392 |
+
"zang": ("z", "ang"),
|
393 |
+
"zao": ("z", "ao"),
|
394 |
+
"ze": ("z", "e"),
|
395 |
+
"zei": ("z", "ei"),
|
396 |
+
"zen": ("z", "en"),
|
397 |
+
"zeng": ("z", "eng"),
|
398 |
+
"zha": ("zh", "a"),
|
399 |
+
"zhai": ("zh", "ai"),
|
400 |
+
"zhan": ("zh", "an"),
|
401 |
+
"zhang": ("zh", "ang"),
|
402 |
+
"zhao": ("zh", "ao"),
|
403 |
+
"zhe": ("zh", "e"),
|
404 |
+
"zhei": ("zh", "ei"),
|
405 |
+
"zhen": ("zh", "en"),
|
406 |
+
"zheng": ("zh", "eng"),
|
407 |
+
"zhi": ("zh", "iii"),
|
408 |
+
"zhong": ("zh", "ong"),
|
409 |
+
"zhou": ("zh", "ou"),
|
410 |
+
"zhu": ("zh", "u"),
|
411 |
+
"zhua": ("zh", "ua"),
|
412 |
+
"zhuai": ("zh", "uai"),
|
413 |
+
"zhuan": ("zh", "uan"),
|
414 |
+
"zhuang": ("zh", "uang"),
|
415 |
+
"zhui": ("zh", "uei"),
|
416 |
+
"zhun": ("zh", "uen"),
|
417 |
+
"zhuo": ("zh", "uo"),
|
418 |
+
"zi": ("z", "ii"),
|
419 |
+
"zong": ("z", "ong"),
|
420 |
+
"zou": ("z", "ou"),
|
421 |
+
"zu": ("z", "u"),
|
422 |
+
"zuan": ("z", "uan"),
|
423 |
+
"zui": ("z", "uei"),
|
424 |
+
"zun": ("z", "uen"),
|
425 |
+
"zuo": ("z", "uo"),
|
426 |
+
}
|
bert/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|