分词器tokenizer错误,无法识别到eos字符
#72
by
zheng-nlper
- opened
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
print(tokenizer._tokenize("你好"))
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
print(tokenizer._tokenize("你好"))