Skip to content

Commit 26f8b2c

Browse files
authored
Make Barthez tokenizer tests a bit faster (huggingface#10399)
* Make Barthez tokenizer tests a bit faster * Quality
1 parent b040e6e commit 26f8b2c

File tree

4 files changed

+6
-8
lines changed

4 files changed

+6
-8
lines changed

tests/test_tokenization_barthez.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
3333
def setUp(self):
3434
super().setUp()
3535

36-
tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
36+
tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
3737
tokenizer.save_pretrained(self.tmpdirname)
38+
tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
3839
self.tokenizer = tokenizer
3940

4041
@require_torch

tests/test_tokenization_common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def test_tokenizer_fast_store_full_signature(self):
238238
tokenizer = self.get_rust_tokenizer()
239239

240240
for parameter_name, parameter in signature.parameters.items():
241-
if parameter.default != inspect.Parameter.empty:
241+
if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
242242
self.assertIn(parameter_name, tokenizer.init_kwargs)
243243

244244
def test_rust_and_python_full_tokenizers(self):

tests/test_tokenization_mbart.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,17 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import os
1516
import tempfile
1617
import unittest
1718

1819
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
19-
from transformers.file_utils import is_sentencepiece_available
2020
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
2121

2222
from .test_tokenization_common import TokenizerTesterMixin
2323

2424

25-
if is_sentencepiece_available():
26-
from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
25+
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
2726

2827

2928
if is_torch_available():

tests/test_tokenization_mbart50.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,12 @@
1717
import unittest
1818

1919
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
20-
from transformers.file_utils import is_sentencepiece_available
2120
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
2221

2322
from .test_tokenization_common import TokenizerTesterMixin
2423

2524

26-
if is_sentencepiece_available():
27-
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
25+
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
2826

2927

3028
if is_torch_available():

0 commit comments

Comments
 (0)