Fix: encoder and decoder in tokenizer
#13
by
JingweiZuo
- opened
tokenization_rwkv_world.py
CHANGED
@@ -106,11 +106,11 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
|
|
106 |
assert isinstance(x, bytes)
|
107 |
assert len(x) == int(l[l.rindex(" ") :])
|
108 |
sorted += [x]
|
109 |
-
self.encoder[
|
110 |
|
111 |
self.decoder = {}
|
112 |
for k, v in self.encoder.items():
|
113 |
-
self.decoder[v] =
|
114 |
|
115 |
self.trie = TRIE()
|
116 |
for t, i in self.decoder.items():
|
|
|
106 |
assert isinstance(x, bytes)
|
107 |
assert len(x) == int(l[l.rindex(" ") :])
|
108 |
sorted += [x]
|
109 |
+
self.encoder[x] = idx
|
110 |
|
111 |
self.decoder = {}
|
112 |
for k, v in self.encoder.items():
|
113 |
+
self.decoder[v] = k
|
114 |
|
115 |
self.trie = TRIE()
|
116 |
for t, i in self.decoder.items():
|