Update tokenizer.py
Browse files- tokenizer.py +1 -0
tokenizer.py
CHANGED
@@ -57,6 +57,7 @@ class ChatGLMTokenizer:
|
|
57 |
prefix_mask += [1, 0]
|
58 |
|
59 |
if text_pair is not None:
|
|
|
60 |
pair_tokens = self.text_tokenizer.encode(text_pair)
|
61 |
tokens += pair_tokens
|
62 |
prefix_mask += [0] * len(pair_tokens)
|
|
|
57 |
prefix_mask += [1, 0]
|
58 |
|
59 |
if text_pair is not None:
|
60 |
+
text_pair = self.preprocess(text_pair, linebreak, whitespaces)
|
61 |
pair_tokens = self.text_tokenizer.encode(text_pair)
|
62 |
tokens += pair_tokens
|
63 |
prefix_mask += [0] * len(pair_tokens)
|