Checking for pair of inputs too.

Narsil · Narsil · commit 806d03332f28 · 2020-11-26T12:27:03.000+01:00
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
@@ -787,11 +787,28 @@ def test_maximum_encoding_length_pair_input(self):
                         self.assertEqual(len(output["input_ids"][0]), model_max_length)
 
                         # Simple with no truncation
-                        output = tokenizer(seq_1, seq_2, padding=padding_state, truncation=False)
-                        self.assertNotEqual(len(output["input_ids"]), model_max_length)
+                        # Reset warnings
+                        tokenizer.deprecation_warnings = {}
+                        with self.assertLogs("transformers", level="WARNING") as cm:
+                            output = tokenizer(seq_1, seq_2, padding=padding_state, truncation=False)
+                            self.assertNotEqual(len(output["input_ids"]), model_max_length)
+                        self.assertEqual(len(cm.records), 1)
+                        self.assertTrue(
+                            cm.records[0].message.startswith(
+                                "Token indices sequence length is longer than the specified maximum sequence length for this model"
+                            )
+                        )
 
-                        output = tokenizer([seq_1], [seq_2], padding=padding_state, truncation=False)
-                        self.assertNotEqual(len(output["input_ids"][0]), model_max_length)
+                        tokenizer.deprecation_warnings = {}
+                        with self.assertLogs("transformers", level="WARNING") as cm:
+                            output = tokenizer([seq_1], [seq_2], padding=padding_state, truncation=False)
+                            self.assertNotEqual(len(output["input_ids"][0]), model_max_length)
+                        self.assertEqual(len(cm.records), 1)
+                        self.assertTrue(
+                            cm.records[0].message.startswith(
+                                "Token indices sequence length is longer than the specified maximum sequence length for this model"
+                            )
+                        )
 
                 truncated_first_sequence = tokenizer.encode(seq_0, add_special_tokens=False)[:-2] + tokenizer.encode(
                     seq_1, add_special_tokens=False