Making the function private and adding it's doc.

Narsil · Narsil · commit b8458ada48cb · 2020-11-26T12:32:31.000+01:00
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
@@ -2853,7 +2853,7 @@ def prepare_for_model(
                 encoded_inputs["special_tokens_mask"] = [0] * len(sequence)
 
         # Check lengths
-        self.eventual_warn_about_too_long_sequence(encoded_inputs["input_ids"], max_length, verbose)
+        self._eventual_warn_about_too_long_sequence(encoded_inputs["input_ids"], max_length, verbose)
 
         # Padding
         if padding_strategy != PaddingStrategy.DO_NOT_PAD or return_attention_mask:
@@ -3163,13 +3163,11 @@ def get_special_tokens_mask(
     @staticmethod
     def clean_up_tokenization(out_string: str) -> str:
         """
-        Clean up a list of simple English tokenization artifacts like spaces before punctuations and abbreviated forms.
+        clean up a list of simple english tokenization artifacts like spaces before punctuations and abbreviated forms.
 
-        Args:
-            out_string (:obj:`str`): The text to clean up.
+        args: out_string (:obj:`str`): the text to clean up.
 
-        Returns:
-            :obj:`str`: The cleaned-up string.
+        returns: :obj:`str`: the cleaned-up string.
         """
         out_string = (
             out_string.replace(" .", ".")
@@ -3185,7 +3183,17 @@ def clean_up_tokenization(out_string: str) -> str:
         )
         return out_string
 
-    def eventual_warn_about_too_long_sequence(self, ids: List[int], max_length: Optional[int], verbose: bool):
+    def _eventual_warn_about_too_long_sequence(self, ids: List[int], max_length: Optional[int], verbose: bool):
+        """
+        Depending on the input and internal state we might trigger a warning about a sequence that is too long for it's
+        corresponding model
+
+        Args:
+            ids (:obj:`List[str]`): The ids produced by the tokenization
+            max_length (:obj:`int`, `optional`): The max_length desired (does not trigger a warning if it is set)
+            verbose (:obj:`bool`): Whether or not to print more information and warnings.
+
+        """
         if max_length is None and len(ids) > self.model_max_length and verbose:
             if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False):
                 logger.warning(
diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py
@@ -419,7 +419,7 @@ def _batch_encode_plus(
             sanitized_tokens["overflow_to_sample_mapping"] = overflow_to_sample_mapping
 
         for input_ids in sanitized_tokens["input_ids"]:
-            self.eventual_warn_about_too_long_sequence(input_ids, max_length, verbose)
+            self._eventual_warn_about_too_long_sequence(input_ids, max_length, verbose)
         return BatchEncoding(sanitized_tokens, sanitized_encodings, tensor_type=return_tensors)
 
     def _encode_plus(
@@ -476,7 +476,7 @@ def _encode_plus(
                 batched_output.encodings,
             )
 
-        self.eventual_warn_about_too_long_sequence(batched_output["input_ids"], max_length, verbose)
+        self._eventual_warn_about_too_long_sequence(batched_output["input_ids"], max_length, verbose)
 
         return batched_output