Skip to content

bpo-12499: support custom len function in textwrap.wrap #28136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
8 changes: 8 additions & 0 deletions Doc/library/textwrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,14 @@ hyphenated words; only then will long words be broken if necessary, unless
.. versionadded:: 3.4


.. attribute:: text_len

(default: ``len``) Used to determine the length of a string. You can
provide a custom function, e.g. to account for wide characters.

.. versionadded:: 3.11


.. index:: single: ...; placeholder

.. attribute:: placeholder
Expand Down
2 changes: 1 addition & 1 deletion Lib/idlelib/idle_test/test_calltip.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_signature_wrap(self):
(width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
placeholder=' [...]')
placeholder=' [...]', text_len=<built-in function len>)
Object for wrapping/filling text. The public interface consists of
the wrap() and fill() methods; the other methods are just there for
subclasses to override in order to tweak the default behaviour.
Expand Down
68 changes: 68 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#

import unittest
import unicodedata

from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten

Expand Down Expand Up @@ -1076,5 +1077,72 @@ def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]")


class WideCharacterTestCase(BaseTestCase):
def text_len(self, text):
return sum(
2 if unicodedata.east_asian_width(c) in {'F', 'W'} else 1
for c in text
)

def check_shorten(self, text, width, expect, **kwargs):
result = shorten(text, width, **kwargs)
self.check(result, expect)

def test_wrap(self):
text = "123 🔧"
self.check_wrap(text, 5, ["123 🔧"])
self.check_wrap(text, 5, ["123", "🔧"], text_len=self.text_len)

def test_wrap_initial_indent(self):
text = "12 12"
self.check_wrap(text, 6, ["🔧12 12"], initial_indent="🔧")
self.check_wrap(text, 6, ["🔧12", "12"], initial_indent="🔧",
text_len=self.text_len)

def test_wrap_subsequent_indent(self):
text = "12 12 12 12"
self.check_wrap(text, 6, ["12 12", "🔧12 12"], subsequent_indent="🔧")
self.check_wrap(text, 6, ["12 12", "🔧12", "🔧12"],
subsequent_indent="🔧", text_len=self.text_len)

def test_shorten(self):
text = "123 1234🔧"
expected = "123 [...]"
self.check_shorten(text, 9, "123 1234🔧")
self.check_shorten(text, 9, "123 [...]", text_len=self.text_len)

def test_shorten_placeholder(self):
text = "123 1 123"
self.check_shorten(text, 7, "123 1 🔧", placeholder=" 🔧")
self.check_shorten(text, 7, "123 🔧", placeholder=" 🔧",
text_len=self.text_len)


class CustomWidthTestCase(BaseTestCase):
def text_len(self, text):
lengths = {
'A': 4,
'B': 2,
'Q': 0,
}

return sum(
lengths[c] if c in lengths else 1
for c in text
)

def test_zero_width_text_len(self):
text = "0QQ1234QQ56789"
self.check_wrap(text, 6, ["0QQ1234QQ5", "6789"], text_len=self.text_len)

def test_char_longer_than_width(self):
text = "AA0123"
self.check_wrap(text, 3, ["A", "A", "012", "3"], text_len=self.text_len)

def test_next_char_overflow(self):
text = "BB0123"
self.check_wrap(text, 3, ["B", "B0", "123"], text_len=self.text_len)


if __name__ == '__main__':
unittest.main()
49 changes: 36 additions & 13 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def __init__(self,
tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
placeholder=' [...]',
text_len=len):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
Expand All @@ -138,6 +139,7 @@ def __init__(self,
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
self.text_len = text_len


# -- Private methods -----------------------------------------------
Expand Down Expand Up @@ -197,6 +199,25 @@ def _fix_sentence_endings(self, chunks):
else:
i += 1

def _find_width_index(self, text, width):
"""_find_length_index(text : string, width : int)

Find at which index the text has the required width, since when using a
different text_len, this index will not be equal to the required width.
"""
# When using default len as self.text_len, the required index and width
# will be equal, this prevents calculation time.
if self.text_len(text[:width]) == width:
# For character widths greater than one, width can be more than the
# number of characters
return min(width, len(text))
cur_text = ''
for i, c in enumerate(text):
cur_text += c
cur_width = self.text_len(cur_text)
if cur_width > width:
return max(i, 1)

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
Expand All @@ -215,12 +236,12 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words:
end = space_left
chunk = reversed_chunks[-1]
if self.break_on_hyphens and len(chunk) > space_left:
end = self._find_width_index(chunk, space_left)
if self.break_on_hyphens and self.text_len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
hyphen = chunk.rfind('-', 0, end)
if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
end = hyphen + 1
cur_line.append(chunk[:end])
Expand Down Expand Up @@ -259,7 +280,8 @@ def _wrap_chunks(self, chunks):
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
if (self.text_len(indent) +
self.text_len(self.placeholder.lstrip()) > self.width):
raise ValueError("placeholder too large for max width")

# Arrange in reverse order so items can be efficiently popped
Expand All @@ -280,15 +302,15 @@ def _wrap_chunks(self, chunks):
indent = self.initial_indent

# Maximum width for this line.
width = self.width - len(indent)
width = self.width - self.text_len(indent)

# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]

while chunks:
l = len(chunks[-1])
l = self.text_len(chunks[-1])

# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
Expand All @@ -301,13 +323,13 @@ def _wrap_chunks(self, chunks):

# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
if chunks and self.text_len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
cur_len = sum(map(self.text_len, cur_line))

# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]

if cur_line:
Expand All @@ -323,16 +345,17 @@ def _wrap_chunks(self, chunks):
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_len + self.text_len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
if (self.text_len(prev_line) +
self.text_len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
Expand Down