-
-
Notifications
You must be signed in to change notification settings - Fork 31.8k
gh-76578: Fix textwrap.wrap() so it's stable if run twice. #5615
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -475,6 +475,30 @@ def test_narrow_non_breaking_space(self): | |
'non-breaking\N{NARROW NO-BREAK SPACE}space.'], | ||
break_on_hyphens=False) | ||
|
||
def test_wrap_stability(self): | ||
def wrap(s): | ||
return fill(s, width=30) | ||
|
||
# original is 31 characters long: | ||
# 0 1 2 3 | ||
# 1234567890123456789012345678901 | ||
original = "xxxx xxxx xxxx xxxx xxxx. xxxx" | ||
wrapped = wrap(original) | ||
wrapped2 = wrap(wrapped) | ||
self.assertEqual(wrapped, wrapped2) | ||
|
||
def test_wrap_stability_with_fix_sentence_endings(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems this case is already covered by test_fix_sentence_endings. test_fix_sentence_endings fails if disable the correction for fix_sentence_endings. |
||
def wrap(s): | ||
return fill(s, width=30, fix_sentence_endings=True) | ||
|
||
# original is 31 characters long: | ||
# 0 1 2 3 | ||
# 1234567890123456789012345678901 | ||
original = "xxx xxxx xxxx xxxx xxxx. xxxx" | ||
wrapped = wrap(original) | ||
wrapped2 = wrap(wrapped) | ||
self.assertEqual(wrapped, wrapped2) | ||
|
||
|
||
class MaxLinesTestCase(BaseTestCase): | ||
text = "Hello there, how are you this fine day? I'm glad to hear it!" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,15 +159,17 @@ def _split(self, text): | |
|
||
Split the text to wrap into indivisible chunks. Chunks are | ||
not quite the same as words; see _wrap_chunks() for full | ||
details. As an example, the text | ||
details. | ||
|
||
As an example, the text | ||
Look, goof-ball -- use the -b option! | ||
breaks into the following chunks: | ||
breaks into the following chunks if break_on_hyphens is True: | ||
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', | ||
'use', ' ', 'the', ' ', '-b', ' ', 'option!' | ||
if break_on_hyphens is True, or in: | ||
|
||
If break_on_hyphens is False, it instead breaks into these chunks: | ||
'Look,', ' ', 'goof-ball', ' ', '--', ' ', | ||
'use', ' ', 'the', ' ', '-b', ' ', option!' | ||
otherwise. | ||
""" | ||
if self.break_on_hyphens is True: | ||
chunks = self.wordsep_re.split(text) | ||
|
@@ -296,6 +298,35 @@ def _wrap_chunks(self, chunks): | |
else: | ||
break | ||
|
||
# If the last chunk on this line is all whitespace, | ||
# and drop_whitespace is on, | ||
# see if substituting a one-space chunk here leaves enough | ||
# room to add the next non-whitespace chunk to the end. | ||
# This fixes bpo-32397, where running wrap() twice on | ||
# a paragraph might not be stable--the second run might | ||
# produce different results than the first. (If the first | ||
# wrap() turned "foo. bar" into "foo.\nbar", the second | ||
# wrap() might turn it back into "foo. bar". In that | ||
# scenario, wrap() wil now produce "foo. bar".) | ||
# | ||
# This is all complicated slightly by fix_sentence_endings, | ||
# where the chunk we add back in might need to be two spaces | ||
# instead of one. | ||
if (chunks and self.drop_whitespace | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to the doc, drop_whitespace means "whitespace at the beginning and ending of every line (after wrapping but before indenting) is dropped". But when you do below I see what you're trying to solve here, but I think that the definition of drop_whitespace, if I understand it correctly, makes wrap inherently non-idempotent. |
||
and cur_line and cur_line[-1].strip() == ''): | ||
spacer = " " | ||
if (self.fix_sentence_endings | ||
and (len(cur_line) > 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this would look better without parenthesis. |
||
and self.sentence_end_re.search(cur_line[-2])): | ||
spacer = " " | ||
new_len = cur_len - len(cur_line[-1]) + len(spacer) + len(chunks[-1]) | ||
if new_len <= width: | ||
cur_line.pop() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would write this as cur_line[-1] = spacer
cur_line.append(chunks.pop()) but this is a matter of style. |
||
cur_line.append(spacer) | ||
cur_line.append(chunks[-1]) | ||
chunks.pop() | ||
cur_len = new_len | ||
|
||
# The current line is full, and the next chunk is too big to | ||
# fit on *any* line (not just this one). | ||
if chunks and len(chunks[-1]) > width: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
Fix ``textwrap.wrap()`` so it's stable. In certain fiddly circumstances, | ||
``textwrap.wrap(x)`` wasn't the same as ``textwrap.wrap(textwrap.wrap(x))``, | ||
which was surprising. This happened when a line was wrapped at a whitespace | ||
blob that was longer than 1 character, but the following word *would* have | ||
fit if that whitespace blob was only 1 character long. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please use more meaningful example? If the test will fail it more convenient to see different words in the report rather of repeated 'xxxx'.