Make Barthez tokenizer tests a bit faster (huggingface#10399)

sgugger · web-flow · commit 26f8b2cb1029 · 2021-02-25T11:42:25.000-05:00
* Make Barthez tokenizer tests a bit faster

* Quality
diff --git a/tests/test_tokenization_barthez.py b/tests/test_tokenization_barthez.py
@@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
     def setUp(self):
         super().setUp()
 
-        tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
+        tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
         tokenizer.save_pretrained(self.tmpdirname)
+        tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
         self.tokenizer = tokenizer
 
     @require_torch
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
@@ -238,7 +238,7 @@ def test_tokenizer_fast_store_full_signature(self):
         tokenizer = self.get_rust_tokenizer()
 
         for parameter_name, parameter in signature.parameters.items():
-            if parameter.default != inspect.Parameter.empty:
+            if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
                 self.assertIn(parameter_name, tokenizer.init_kwargs)
 
     def test_rust_and_python_full_tokenizers(self):
diff --git a/tests/test_tokenization_mbart.py b/tests/test_tokenization_mbart.py
@@ -12,18 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import tempfile
 import unittest
 
 from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
-from transformers.file_utils import is_sentencepiece_available
 from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
 
 from .test_tokenization_common import TokenizerTesterMixin
 
 
-if is_sentencepiece_available():
-    from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
+SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
 
 
 if is_torch_available():
diff --git a/tests/test_tokenization_mbart50.py b/tests/test_tokenization_mbart50.py
@@ -17,14 +17,12 @@
 import unittest
 
 from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
-from transformers.file_utils import is_sentencepiece_available
 from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
 
 from .test_tokenization_common import TokenizerTesterMixin
 
 
-if is_sentencepiece_available():
-    SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
+SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
 
 
 if is_torch_available():