Merge pull request huggingface#1359 from dennymarcels/patch-1

thomwolf · web-flow · commit 04e9a6f51267 · 2019-09-27T22:58:19.000+02:00
Update run_lm_finetuning.py
diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py
@@ -75,7 +75,7 @@ def __init__(self, tokenizer, file_path='train', block_size=512):
             tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
 
             for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size
-                self.examples.append(tokenizer.add_special_tokens_single_sentence(tokenized_text[i:i+block_size]))
+                self.examples.append(tokenizer.add_special_tokens_single_sequence(tokenized_text[i:i+block_size]))
             # Note that we are loosing the last truncated example here for the sake of simplicity (no padding)
             # If your dataset is small, first you should loook for a bigger one :-) and second you
             # can change this behavior by adding (model specific) padding.