From 31cf38c633b2d729b1d950003077e1ea8ab35269 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 13 Aug 2025 02:40:01 +0100 Subject: [PATCH 1/3] Move textwrap utilities to ``blurb._utils.text`` --- pyproject.toml | 3 ++ src/blurb/_blurb_file.py | 3 +- src/blurb/_merge.py | 3 +- src/blurb/_utils/__init__.py | 0 src/blurb/_utils/text.py | 99 ++++++++++++++++++++++++++++++++++++ src/blurb/blurb.py | 89 -------------------------------- tests/test_blurb.py | 43 ---------------- tests/test_utils_text.py | 45 ++++++++++++++++ 8 files changed, 151 insertions(+), 134 deletions(-) create mode 100644 src/blurb/_utils/__init__.py create mode 100644 src/blurb/_utils/text.py create mode 100644 tests/test_utils_text.py diff --git a/pyproject.toml b/pyproject.toml index 5bac726..9ccd1f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,9 @@ urls.Changelog = "https://github.com/python/blurb/blob/main/CHANGELOG.md" urls.Homepage = "https://github.com/python/blurb" urls.Source = "https://github.com/python/blurb" scripts.blurb = "blurb._cli:main" +dependencies = [ + "pytest>=8.4.1", +] [tool.hatch] version.source = "vcs" diff --git a/src/blurb/_blurb_file.py b/src/blurb/_blurb_file.py index fbd1cf5..d0c0df4 100644 --- a/src/blurb/_blurb_file.py +++ b/src/blurb/_blurb_file.py @@ -83,7 +83,8 @@ import re from blurb._template import sanitize_section, sections, unsanitize_section -from blurb.blurb import BlurbError, textwrap_body, sortable_datetime, nonceify +from blurb._utils.text import textwrap_body +from blurb.blurb import BlurbError, sortable_datetime, nonceify root = None # Set by chdir_to_repo_root() lowest_possible_gh_issue_number = 32426 diff --git a/src/blurb/_merge.py b/src/blurb/_merge.py index ab26a3e..aa9aaee 100644 --- a/src/blurb/_merge.py +++ b/src/blurb/_merge.py @@ -4,8 +4,9 @@ from blurb._blurb_file import Blurbs from blurb._cli import require_ok, subcommand +from blurb._utils.text import textwrap_body from blurb._versions import glob_versions, printable_version -from blurb.blurb import glob_blurbs, textwrap_body +from blurb.blurb import glob_blurbs original_dir: str = os.getcwd() diff --git a/src/blurb/_utils/__init__.py b/src/blurb/_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/blurb/_utils/text.py b/src/blurb/_utils/text.py new file mode 100644 index 0000000..b5b7d02 --- /dev/null +++ b/src/blurb/_utils/text.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import itertools +import textwrap + +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Iterable + + +def textwrap_body(body: str | Iterable[str], *, subsequent_indent: str = '') -> str: + """Wrap body text. + + Accepts either a string or an iterable of strings. + (Iterable is assumed to be individual lines.) + Returns a string. + """ + if isinstance(body, str): + text = body + else: + text = '\n'.join(body).rstrip() + + # textwrap merges paragraphs, ARGH + + # step 1: remove trailing whitespace from individual lines + # (this means that empty lines will just have \n, no invisible whitespace) + lines = [] + for line in text.split('\n'): + lines.append(line.rstrip()) + text = '\n'.join(lines) + # step 2: break into paragraphs and wrap those + paragraphs = text.split('\n\n') + paragraphs2 = [] + kwargs: dict[str, object] = {'break_long_words': False, 'break_on_hyphens': False} + if subsequent_indent: + kwargs['subsequent_indent'] = subsequent_indent + dont_reflow = False + for paragraph in paragraphs: + # don't reflow bulleted / numbered lists + dont_reflow = dont_reflow or paragraph.startswith(('* ', '1. ', '#. ')) + if dont_reflow: + initial = kwargs.get('initial_indent', '') + subsequent = kwargs.get('subsequent_indent', '') + if initial or subsequent: + lines = [line.rstrip() for line in paragraph.split('\n')] + indents = itertools.chain( + itertools.repeat(initial, 1), + itertools.repeat(subsequent), + ) + lines = [indent + line for indent, line in zip(indents, lines)] + paragraph = '\n'.join(lines) + paragraphs2.append(paragraph) + else: + # Why do we reflow the text twice? Because it can actually change + # between the first and second reflows, and we want the text to + # be stable. The problem is that textwrap.wrap is deliberately + # dumb about how many spaces follow a period in prose. + # + # We're reflowing at 76 columns, but let's pretend it's 30 for + # illustration purposes. If we give textwrap.wrap the following + # text--ignore the line of 30 dashes, that's just to help you + # with visualization: + # + # ------------------------------ + # xxxx xxxx xxxx xxxx xxxx. xxxx + # + # The first textwrap.wrap will return this: + # 'xxxx xxxx xxxx xxxx xxxx.\nxxxx' + # + # If we reflow it again, textwrap will rejoin the lines, but + # only with one space after the period! So this time it'll + # all fit on one line, behold: + # ------------------------------ + # xxxx xxxx xxxx xxxx xxxx. xxxx + # and so it now returns: + # 'xxxx xxxx xxxx xxxx xxxx. xxxx' + # + # textwrap.wrap supports trying to add two spaces after a peroid: + # https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings + # But it doesn't work all that well, because it's not smart enough + # to do a really good job. + # + # Since blurbs are eventually turned into reST and rendered anyway, + # and since the Zen says 'In the face of ambiguity, refuse the + # temptation to guess', I don't sweat it. I run textwrap.wrap + # twice, so it's stable, and this means occasionally it'll + # convert two spaces to one space, no big deal. + + paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() + paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() + paragraphs2.append(paragraph) + # don't reflow literal code blocks (I hope) + dont_reflow = paragraph.endswith('::') + if subsequent_indent: + kwargs['initial_indent'] = subsequent_indent + text = '\n\n'.join(paragraphs2).rstrip() + if not text.endswith('\n'): + text += '\n' + return text diff --git a/src/blurb/blurb.py b/src/blurb/blurb.py index 4e0082b..bd177fd 100755 --- a/src/blurb/blurb.py +++ b/src/blurb/blurb.py @@ -53,95 +53,6 @@ sanitize_section_legacy, sections, unsanitize_section, ) -def textwrap_body(body, *, subsequent_indent=''): - """ - Accepts either a string or an iterable of strings. - (Iterable is assumed to be individual lines.) - Returns a string. - """ - if isinstance(body, str): - text = body - else: - text = "\n".join(body).rstrip() - - # textwrap merges paragraphs, ARGH - - # step 1: remove trailing whitespace from individual lines - # (this means that empty lines will just have \n, no invisible whitespace) - lines = [] - for line in text.split("\n"): - lines.append(line.rstrip()) - text = "\n".join(lines) - # step 2: break into paragraphs and wrap those - paragraphs = text.split("\n\n") - paragraphs2 = [] - kwargs = {'break_long_words': False, 'break_on_hyphens': False} - if subsequent_indent: - kwargs['subsequent_indent'] = subsequent_indent - dont_reflow = False - for paragraph in paragraphs: - # don't reflow bulleted / numbered lists - dont_reflow = dont_reflow or paragraph.startswith(("* ", "1. ", "#. ")) - if dont_reflow: - initial = kwargs.get("initial_indent", "") - subsequent = kwargs.get("subsequent_indent", "") - if initial or subsequent: - lines = [line.rstrip() for line in paragraph.split("\n")] - indents = itertools.chain( - itertools.repeat(initial, 1), - itertools.repeat(subsequent), - ) - lines = [indent + line for indent, line in zip(indents, lines)] - paragraph = "\n".join(lines) - paragraphs2.append(paragraph) - else: - # Why do we reflow the text twice? Because it can actually change - # between the first and second reflows, and we want the text to - # be stable. The problem is that textwrap.wrap is deliberately - # dumb about how many spaces follow a period in prose. - # - # We're reflowing at 76 columns, but let's pretend it's 30 for - # illustration purposes. If we give textwrap.wrap the following - # text--ignore the line of 30 dashes, that's just to help you - # with visualization: - # - # ------------------------------ - # xxxx xxxx xxxx xxxx xxxx. xxxx - # - # The first textwrap.wrap will return this: - # "xxxx xxxx xxxx xxxx xxxx.\nxxxx" - # - # If we reflow it again, textwrap will rejoin the lines, but - # only with one space after the period! So this time it'll - # all fit on one line, behold: - # ------------------------------ - # xxxx xxxx xxxx xxxx xxxx. xxxx - # and so it now returns: - # "xxxx xxxx xxxx xxxx xxxx. xxxx" - # - # textwrap.wrap supports trying to add two spaces after a peroid: - # https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings - # But it doesn't work all that well, because it's not smart enough - # to do a really good job. - # - # Since blurbs are eventually turned into ReST and rendered anyway, - # and since the Zen says "In the face of ambiguity, refuse the - # temptation to guess", I don't sweat it. I run textwrap.wrap - # twice, so it's stable, and this means occasionally it'll - # convert two spaces to one space, no big deal. - - paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() - paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() - paragraphs2.append(paragraph) - # don't reflow literal code blocks (I hope) - dont_reflow = paragraph.endswith("::") - if subsequent_indent: - kwargs['initial_indent'] = subsequent_indent - text = "\n\n".join(paragraphs2).rstrip() - if not text.endswith("\n"): - text += "\n" - return text - def sortable_datetime(): return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) diff --git a/tests/test_blurb.py b/tests/test_blurb.py index 7d23b8a..4f11726 100644 --- a/tests/test_blurb.py +++ b/tests/test_blurb.py @@ -4,49 +4,6 @@ from blurb import blurb -@pytest.mark.parametrize( - "body, subsequent_indent, expected", - ( - ( - "This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.", - "", - "This is a test of the textwrap_body function with a string. It should wrap\n" - "the text to 79 characters.\n", - ), - ( - [ - "This is a test of the textwrap_body function", - "with an iterable of strings.", - "It should wrap the text to 79 characters.", - ], - "", - "This is a test of the textwrap_body function with an iterable of strings. It\n" - "should wrap the text to 79 characters.\n", - ), - ( - "This is a test of the textwrap_body function with a string and subsequent indent.", - " ", - "This is a test of the textwrap_body function with a string and subsequent\n" - " indent.\n", - ), - ( - "This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n" - "\n" - "* Item 1\n" - "* Item 2\n", - " ", - "This is a test of the textwrap_body function with a bullet list and\n" - " subsequent indent. The list should not be wrapped.\n" - "\n" - " * Item 1\n" - " * Item 2\n", - ), - ), -) -def test_textwrap_body(body, subsequent_indent, expected): - assert blurb.textwrap_body(body, subsequent_indent=subsequent_indent) == expected - - @time_machine.travel("2025-01-07 16:28:41") def test_sortable_datetime(): assert blurb.sortable_datetime() == "2025-01-07-16-28-41" diff --git a/tests/test_utils_text.py b/tests/test_utils_text.py new file mode 100644 index 0000000..831a649 --- /dev/null +++ b/tests/test_utils_text.py @@ -0,0 +1,45 @@ +import pytest +from blurb._utils.text import textwrap_body + + +@pytest.mark.parametrize( + "body, subsequent_indent, expected", + ( + ( + "This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.", + "", + "This is a test of the textwrap_body function with a string. It should wrap\n" + "the text to 79 characters.\n", + ), + ( + [ + "This is a test of the textwrap_body function", + "with an iterable of strings.", + "It should wrap the text to 79 characters.", + ], + "", + "This is a test of the textwrap_body function with an iterable of strings. It\n" + "should wrap the text to 79 characters.\n", + ), + ( + "This is a test of the textwrap_body function with a string and subsequent indent.", + " ", + "This is a test of the textwrap_body function with a string and subsequent\n" + " indent.\n", + ), + ( + "This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n" + "\n" + "* Item 1\n" + "* Item 2\n", + " ", + "This is a test of the textwrap_body function with a bullet list and\n" + " subsequent indent. The list should not be wrapped.\n" + "\n" + " * Item 1\n" + " * Item 2\n", + ), + ), +) +def test_textwrap_body(body, subsequent_indent, expected): + assert textwrap_body(body, subsequent_indent=subsequent_indent) == expected From 81f2b84f174d1209394ff729d6ec2ad09929e336 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 13 Aug 2025 02:41:13 +0100 Subject: [PATCH 2/3] remove unused imports --- src/blurb/blurb.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/blurb/blurb.py b/src/blurb/blurb.py index bd177fd..4357bd9 100755 --- a/src/blurb/blurb.py +++ b/src/blurb/blurb.py @@ -42,15 +42,13 @@ import base64 import glob import hashlib -import itertools import os import sys -import textwrap import time from blurb._template import ( next_filename_unsanitize_sections, sanitize_section, - sanitize_section_legacy, sections, unsanitize_section, + sanitize_section_legacy, sections, ) def sortable_datetime(): From 5d12549dfa235abfd12fe04be2b102719e1ab826 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 13 Aug 2025 02:42:37 +0100 Subject: [PATCH 3/3] fixup! Move textwrap utilities to ``blurb._utils.text`` --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9ccd1f5..5bac726 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,9 +40,6 @@ urls.Changelog = "https://github.com/python/blurb/blob/main/CHANGELOG.md" urls.Homepage = "https://github.com/python/blurb" urls.Source = "https://github.com/python/blurb" scripts.blurb = "blurb._cli:main" -dependencies = [ - "pytest>=8.4.1", -] [tool.hatch] version.source = "vcs"