Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/blurb/_blurb_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@
import re

from blurb._template import sanitize_section, sections, unsanitize_section
from blurb.blurb import BlurbError, textwrap_body, sortable_datetime, nonceify
from blurb._utils.text import textwrap_body
from blurb.blurb import BlurbError, sortable_datetime, nonceify

root = None # Set by chdir_to_repo_root()
lowest_possible_gh_issue_number = 32426
Expand Down
3 changes: 2 additions & 1 deletion src/blurb/_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from blurb._blurb_file import Blurbs
from blurb._cli import require_ok, subcommand
from blurb._utils.text import textwrap_body
from blurb._versions import glob_versions, printable_version
from blurb.blurb import glob_blurbs, textwrap_body
from blurb.blurb import glob_blurbs

original_dir: str = os.getcwd()

Expand Down
Empty file added src/blurb/_utils/__init__.py
Empty file.
99 changes: 99 additions & 0 deletions src/blurb/_utils/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from __future__ import annotations

import itertools
import textwrap

TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Iterable


def textwrap_body(body: str | Iterable[str], *, subsequent_indent: str = '') -> str:
"""Wrap body text.

Accepts either a string or an iterable of strings.
(Iterable is assumed to be individual lines.)
Returns a string.
"""
if isinstance(body, str):
text = body
else:
text = '\n'.join(body).rstrip()

# textwrap merges paragraphs, ARGH

# step 1: remove trailing whitespace from individual lines
# (this means that empty lines will just have \n, no invisible whitespace)
lines = []
for line in text.split('\n'):
lines.append(line.rstrip())
text = '\n'.join(lines)
# step 2: break into paragraphs and wrap those
paragraphs = text.split('\n\n')
paragraphs2 = []
kwargs: dict[str, object] = {'break_long_words': False, 'break_on_hyphens': False}
if subsequent_indent:
kwargs['subsequent_indent'] = subsequent_indent
dont_reflow = False
for paragraph in paragraphs:
# don't reflow bulleted / numbered lists
dont_reflow = dont_reflow or paragraph.startswith(('* ', '1. ', '#. '))
if dont_reflow:
initial = kwargs.get('initial_indent', '')
subsequent = kwargs.get('subsequent_indent', '')
if initial or subsequent:
lines = [line.rstrip() for line in paragraph.split('\n')]
indents = itertools.chain(
itertools.repeat(initial, 1),
itertools.repeat(subsequent),
)
lines = [indent + line for indent, line in zip(indents, lines)]
paragraph = '\n'.join(lines)
paragraphs2.append(paragraph)
else:
# Why do we reflow the text twice? Because it can actually change
# between the first and second reflows, and we want the text to
# be stable. The problem is that textwrap.wrap is deliberately
# dumb about how many spaces follow a period in prose.
#
# We're reflowing at 76 columns, but let's pretend it's 30 for
# illustration purposes. If we give textwrap.wrap the following
# text--ignore the line of 30 dashes, that's just to help you
# with visualization:
#
# ------------------------------
# xxxx xxxx xxxx xxxx xxxx. xxxx
#
# The first textwrap.wrap will return this:
# 'xxxx xxxx xxxx xxxx xxxx.\nxxxx'
#
# If we reflow it again, textwrap will rejoin the lines, but
# only with one space after the period! So this time it'll
# all fit on one line, behold:
# ------------------------------
# xxxx xxxx xxxx xxxx xxxx. xxxx
# and so it now returns:
# 'xxxx xxxx xxxx xxxx xxxx. xxxx'
#
# textwrap.wrap supports trying to add two spaces after a peroid:
# https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings
# But it doesn't work all that well, because it's not smart enough
# to do a really good job.
#
# Since blurbs are eventually turned into reST and rendered anyway,
# and since the Zen says 'In the face of ambiguity, refuse the
# temptation to guess', I don't sweat it. I run textwrap.wrap
# twice, so it's stable, and this means occasionally it'll
# convert two spaces to one space, no big deal.

paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
paragraphs2.append(paragraph)
# don't reflow literal code blocks (I hope)
dont_reflow = paragraph.endswith('::')
if subsequent_indent:
kwargs['initial_indent'] = subsequent_indent
text = '\n\n'.join(paragraphs2).rstrip()
if not text.endswith('\n'):
text += '\n'
return text
93 changes: 1 addition & 92 deletions src/blurb/blurb.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,106 +42,15 @@
import base64
import glob
import hashlib
import itertools
import os
import sys
import textwrap
import time

from blurb._template import (
next_filename_unsanitize_sections, sanitize_section,
sanitize_section_legacy, sections, unsanitize_section,
sanitize_section_legacy, sections,
)

def textwrap_body(body, *, subsequent_indent=''):
"""
Accepts either a string or an iterable of strings.
(Iterable is assumed to be individual lines.)
Returns a string.
"""
if isinstance(body, str):
text = body
else:
text = "\n".join(body).rstrip()

# textwrap merges paragraphs, ARGH

# step 1: remove trailing whitespace from individual lines
# (this means that empty lines will just have \n, no invisible whitespace)
lines = []
for line in text.split("\n"):
lines.append(line.rstrip())
text = "\n".join(lines)
# step 2: break into paragraphs and wrap those
paragraphs = text.split("\n\n")
paragraphs2 = []
kwargs = {'break_long_words': False, 'break_on_hyphens': False}
if subsequent_indent:
kwargs['subsequent_indent'] = subsequent_indent
dont_reflow = False
for paragraph in paragraphs:
# don't reflow bulleted / numbered lists
dont_reflow = dont_reflow or paragraph.startswith(("* ", "1. ", "#. "))
if dont_reflow:
initial = kwargs.get("initial_indent", "")
subsequent = kwargs.get("subsequent_indent", "")
if initial or subsequent:
lines = [line.rstrip() for line in paragraph.split("\n")]
indents = itertools.chain(
itertools.repeat(initial, 1),
itertools.repeat(subsequent),
)
lines = [indent + line for indent, line in zip(indents, lines)]
paragraph = "\n".join(lines)
paragraphs2.append(paragraph)
else:
# Why do we reflow the text twice? Because it can actually change
# between the first and second reflows, and we want the text to
# be stable. The problem is that textwrap.wrap is deliberately
# dumb about how many spaces follow a period in prose.
#
# We're reflowing at 76 columns, but let's pretend it's 30 for
# illustration purposes. If we give textwrap.wrap the following
# text--ignore the line of 30 dashes, that's just to help you
# with visualization:
#
# ------------------------------
# xxxx xxxx xxxx xxxx xxxx. xxxx
#
# The first textwrap.wrap will return this:
# "xxxx xxxx xxxx xxxx xxxx.\nxxxx"
#
# If we reflow it again, textwrap will rejoin the lines, but
# only with one space after the period! So this time it'll
# all fit on one line, behold:
# ------------------------------
# xxxx xxxx xxxx xxxx xxxx. xxxx
# and so it now returns:
# "xxxx xxxx xxxx xxxx xxxx. xxxx"
#
# textwrap.wrap supports trying to add two spaces after a peroid:
# https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings
# But it doesn't work all that well, because it's not smart enough
# to do a really good job.
#
# Since blurbs are eventually turned into ReST and rendered anyway,
# and since the Zen says "In the face of ambiguity, refuse the
# temptation to guess", I don't sweat it. I run textwrap.wrap
# twice, so it's stable, and this means occasionally it'll
# convert two spaces to one space, no big deal.

paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip()
paragraphs2.append(paragraph)
# don't reflow literal code blocks (I hope)
dont_reflow = paragraph.endswith("::")
if subsequent_indent:
kwargs['initial_indent'] = subsequent_indent
text = "\n\n".join(paragraphs2).rstrip()
if not text.endswith("\n"):
text += "\n"
return text

def sortable_datetime():
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

Expand Down
43 changes: 0 additions & 43 deletions tests/test_blurb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,6 @@
from blurb import blurb


@pytest.mark.parametrize(
"body, subsequent_indent, expected",
(
(
"This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.",
"",
"This is a test of the textwrap_body function with a string. It should wrap\n"
"the text to 79 characters.\n",
),
(
[
"This is a test of the textwrap_body function",
"with an iterable of strings.",
"It should wrap the text to 79 characters.",
],
"",
"This is a test of the textwrap_body function with an iterable of strings. It\n"
"should wrap the text to 79 characters.\n",
),
(
"This is a test of the textwrap_body function with a string and subsequent indent.",
" ",
"This is a test of the textwrap_body function with a string and subsequent\n"
" indent.\n",
),
(
"This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n"
"\n"
"* Item 1\n"
"* Item 2\n",
" ",
"This is a test of the textwrap_body function with a bullet list and\n"
" subsequent indent. The list should not be wrapped.\n"
"\n"
" * Item 1\n"
" * Item 2\n",
),
),
)
def test_textwrap_body(body, subsequent_indent, expected):
assert blurb.textwrap_body(body, subsequent_indent=subsequent_indent) == expected


@time_machine.travel("2025-01-07 16:28:41")
def test_sortable_datetime():
assert blurb.sortable_datetime() == "2025-01-07-16-28-41"
Expand Down
45 changes: 45 additions & 0 deletions tests/test_utils_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pytest
from blurb._utils.text import textwrap_body


@pytest.mark.parametrize(
"body, subsequent_indent, expected",
(
(
"This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.",
"",
"This is a test of the textwrap_body function with a string. It should wrap\n"
"the text to 79 characters.\n",
),
(
[
"This is a test of the textwrap_body function",
"with an iterable of strings.",
"It should wrap the text to 79 characters.",
],
"",
"This is a test of the textwrap_body function with an iterable of strings. It\n"
"should wrap the text to 79 characters.\n",
),
(
"This is a test of the textwrap_body function with a string and subsequent indent.",
" ",
"This is a test of the textwrap_body function with a string and subsequent\n"
" indent.\n",
),
(
"This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n"
"\n"
"* Item 1\n"
"* Item 2\n",
" ",
"This is a test of the textwrap_body function with a bullet list and\n"
" subsequent indent. The list should not be wrapped.\n"
"\n"
" * Item 1\n"
" * Item 2\n",
),
),
)
def test_textwrap_body(body, subsequent_indent, expected):
assert textwrap_body(body, subsequent_indent=subsequent_indent) == expected