Skip to content

Commit 04e9a6f

Browse files
authored
Merge pull request huggingface#1359 from dennymarcels/patch-1
Update run_lm_finetuning.py
2 parents ca55982 + 9478590 commit 04e9a6f

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

examples/run_lm_finetuning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(self, tokenizer, file_path='train', block_size=512):
7575
tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
7676

7777
for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size
78-
self.examples.append(tokenizer.add_special_tokens_single_sentence(tokenized_text[i:i+block_size]))
78+
self.examples.append(tokenizer.add_special_tokens_single_sequence(tokenized_text[i:i+block_size]))
7979
# Note that we are loosing the last truncated example here for the sake of simplicity (no padding)
8080
# If your dataset is small, first you should loook for a bigger one :-) and second you
8181
# can change this behavior by adding (model specific) padding.

0 commit comments

Comments
 (0)