Skip to content

Commit 0e89b27

Browse files
committed
move special tokens list
1 parent d2e4330 commit 0e89b27

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

src/transformers/models/bartpho/tokenization_bartpho.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,17 +323,18 @@ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] =
323323
) and os.path.isfile(self.monolingual_vocab_file):
324324
copyfile(self.monolingual_vocab_file, out_monolingual_vocab_file)
325325
elif not os.path.isfile(self.monolingual_vocab_file):
326+
special_tokens = [
327+
self.bos_token,
328+
self.eos_token,
329+
self.sep_token,
330+
self.cls_token,
331+
self.unk_token,
332+
self.pad_token,
333+
self.mask_token,
334+
]
326335
with open(out_monolingual_vocab_file, "w", encoding="utf-8") as fp:
327336
for token in self.fairseq_tokens_to_ids:
328-
if token not in [
329-
str(self.bos_token),
330-
str(self.eos_token),
331-
str(self.sep_token),
332-
str(self.cls_token),
333-
str(self.unk_token),
334-
str(self.pad_token),
335-
str(self.mask_token),
336-
]:
337-
fp.write(f"{token} \n")
337+
if token not in special_tokens:
338+
fp.write(f"{str(token)} \n")
338339

339340
return out_vocab_file, out_monolingual_vocab_file

0 commit comments

Comments
 (0)