@@ -787,11 +787,28 @@ def test_maximum_encoding_length_pair_input(self):
787
787
self .assertEqual (len (output ["input_ids" ][0 ]), model_max_length )
788
788
789
789
# Simple with no truncation
790
- output = tokenizer (seq_1 , seq_2 , padding = padding_state , truncation = False )
791
- self .assertNotEqual (len (output ["input_ids" ]), model_max_length )
790
+ # Reset warnings
791
+ tokenizer .deprecation_warnings = {}
792
+ with self .assertLogs ("transformers" , level = "WARNING" ) as cm :
793
+ output = tokenizer (seq_1 , seq_2 , padding = padding_state , truncation = False )
794
+ self .assertNotEqual (len (output ["input_ids" ]), model_max_length )
795
+ self .assertEqual (len (cm .records ), 1 )
796
+ self .assertTrue (
797
+ cm .records [0 ].message .startswith (
798
+ "Token indices sequence length is longer than the specified maximum sequence length for this model"
799
+ )
800
+ )
792
801
793
- output = tokenizer ([seq_1 ], [seq_2 ], padding = padding_state , truncation = False )
794
- self .assertNotEqual (len (output ["input_ids" ][0 ]), model_max_length )
802
+ tokenizer .deprecation_warnings = {}
803
+ with self .assertLogs ("transformers" , level = "WARNING" ) as cm :
804
+ output = tokenizer ([seq_1 ], [seq_2 ], padding = padding_state , truncation = False )
805
+ self .assertNotEqual (len (output ["input_ids" ][0 ]), model_max_length )
806
+ self .assertEqual (len (cm .records ), 1 )
807
+ self .assertTrue (
808
+ cm .records [0 ].message .startswith (
809
+ "Token indices sequence length is longer than the specified maximum sequence length for this model"
810
+ )
811
+ )
795
812
796
813
truncated_first_sequence = tokenizer .encode (seq_0 , add_special_tokens = False )[:- 2 ] + tokenizer .encode (
797
814
seq_1 , add_special_tokens = False
0 commit comments