From ad85b834dda671a8ff7942258cece9275bb0817b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 16 May 2024 17:34:30 +0300 Subject: [PATCH 1/2] gh-118643: Fix AttributeError in the email module Fix regression introduced in gh-100884: AttributeError when re-fold a long address list. Also fix more cases of incorrect encoding of the address separator in the address list missed in gh-100884. --- Lib/email/_header_value_parser.py | 15 ++++++++++++--- Lib/test/test_email/test__header_value_parser.py | 12 ++++++++++-- ...2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst | 2 ++ 3 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index d1b4c7df4f445f..fccf1e40064f4e 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -956,6 +956,7 @@ class _InvalidEwError(errors.HeaderParseError): DOT = ValueTerminal('.', 'dot') ListSeparator = ValueTerminal(',', 'list-separator') ListSeparator.as_ew_allowed = False +ListSeparator.syntactic_break = False RouteComponentMarker = ValueTerminal('@', 'route-component-marker') # @@ -2838,7 +2839,9 @@ def _refold_parse_tree(parse_tree, *, policy): if not hasattr(part, 'encode'): # It's not a Terminal, do each piece individually. parts = list(part) + parts - else: + want_encoding = False + continue + elif part.as_ew_allowed: # It's a terminal, wrap it as an encoded word, possibly # combining it with previously encoded words if allowed. if (last_ew is not None and @@ -2849,8 +2852,14 @@ def _refold_parse_tree(parse_tree, *, policy): last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, part.ew_combine_allowed, charset) last_charset = charset - want_encoding = False - continue + want_encoding = False + continue + else: + # It's a terminal which should be kept non-encoded + # (e.g. a ListSeparator). + last_ew = None + want_encoding = False + pass if len(tstr) <= maxlen - len(lines[-1]): lines[-1] += tstr continue diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 56a1e3a3de5aa2..5413319a414a62 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3077,9 +3077,17 @@ def test_address_list_with_unicode_names_in_quotes(self): ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') def test_address_list_with_list_separator_after_fold(self): - to = '0123456789' * 8 + '@foo, ä ' + a = 'x' * 66 + '@example.com' + to = f'{a}, "Hübsch Kaktus" ' self._test(parser.get_address_list(to)[0], - '0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= \n') + f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus \n') + + a = '.' * 79 + to = f'"{a}" , "Hübsch Kaktus" ' + self._test(parser.get_address_list(to)[0], + f'{a}\n' + ' , =?utf-8?q?H=C3=BCbsch?= Kaktus ' + '\n') # XXX Need tests with comments on various sides of a unicode token, # and with unicode tokens in the comments. Spaces inside the quotes diff --git a/Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst b/Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst new file mode 100644 index 00000000000000..e86a49af74c9d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst @@ -0,0 +1,2 @@ +Fix an AttributeError in the :mod:`email` module when re-fold a long address +list. Also fix more cases of incorrect encoding of the address separator in the address list. From 62125d773402b5196e638e3b415829a89b16e502 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 17 May 2024 18:29:14 +0300 Subject: [PATCH 2/2] Update Lib/email/_header_value_parser.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Marta Gómez Macías --- Lib/email/_header_value_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index fccf1e40064f4e..55690deac018e5 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2859,7 +2859,6 @@ def _refold_parse_tree(parse_tree, *, policy): # (e.g. a ListSeparator). last_ew = None want_encoding = False - pass if len(tstr) <= maxlen - len(lines[-1]): lines[-1] += tstr continue