Update tokenization_kosmos2_fast.py
Browse files
tokenization_kosmos2_fast.py
CHANGED
@@ -137,7 +137,6 @@ class Kosmos2TokenizerFast(PreTrainedTokenizerFast):
|
|
137 |
)
|
138 |
|
139 |
self.vocab_file = vocab_file
|
140 |
-
self.can_save_slow_tokenizer = False if not self.vocab_file else True
|
141 |
|
142 |
self.eod_token = "</doc>"
|
143 |
|
@@ -179,6 +178,10 @@ class Kosmos2TokenizerFast(PreTrainedTokenizerFast):
|
|
179 |
# we need to set `special_tokens=False` to be the same as in the slow tokenizer.
|
180 |
self.add_tokens(AddedToken(token, lstrip=True, rstrip=False), special_tokens=False)
|
181 |
|
|
|
|
|
|
|
|
|
182 |
def build_inputs_with_special_tokens(
|
183 |
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
184 |
) -> List[int]:
|
|
|
137 |
)
|
138 |
|
139 |
self.vocab_file = vocab_file
|
|
|
140 |
|
141 |
self.eod_token = "</doc>"
|
142 |
|
|
|
178 |
# we need to set `special_tokens=False` to be the same as in the slow tokenizer.
|
179 |
self.add_tokens(AddedToken(token, lstrip=True, rstrip=False), special_tokens=False)
|
180 |
|
181 |
+
@property
|
182 |
+
def can_save_slow_tokenizer(self) -> bool:
|
183 |
+
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
184 |
+
|
185 |
def build_inputs_with_special_tokens(
|
186 |
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
187 |
) -> List[int]:
|