From b1f41b8b30ea62c7975fb31cb9377c4e73c24c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 9 Jan 2023 13:49:39 +0000 Subject: [PATCH 1/2] GH-100884: email/_header_value_parser: use existing ListSeparator There already is already a predefined object with the correct properties, use it. --- Lib/email/_header_value_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index e637e6df06612d..b46da73b9ba746 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2022,7 +2022,7 @@ def get_address_list(value): address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value: # Must be a , at this point. - address_list.append(ValueTerminal(',', 'list-separator')) + address_list.append(ListSeparator) value = value[1:] return address_list, value From 75a89ca883f603582b69e7f061a2f75f8f12125e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 9 Jan 2023 06:05:35 +0000 Subject: [PATCH 2/2] GH-100884: email/_header_value_parser: don't encode list separators ListSeparator should not be encoded. This could happen when a long line pushes its separator to the next line, which would have been encoded. Fixes #100884 --- Lib/email/_header_value_parser.py | 1 + Lib/test/test_email/test__header_value_parser.py | 5 +++++ .../Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst | 2 ++ 3 files changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index b46da73b9ba746..8b22ce34049331 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -949,6 +949,7 @@ class _InvalidEwError(errors.HeaderParseError): # up other parse trees. Maybe should have tests for that, too. DOT = ValueTerminal('.', 'dot') ListSeparator = ValueTerminal(',', 'list-separator') +ListSeparator.as_ew_allowed = False RouteComponentMarker = ValueTerminal('@', 'route-component-marker') # diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 854f2ff009c618..7063ce7d71c7ab 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2946,6 +2946,11 @@ def test_address_list_with_unicode_names_in_quotes(self): '=?utf-8?q?H=C3=BCbsch?= Kaktus ,\n' ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') + def test_address_list_with_list_separator_after_fold(self): + to = '0123456789' * 8 + '@foo, รค ' + self._test(parser.get_address_list(to)[0], + '0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= \n') + # XXX Need tests with comments on various sides of a unicode token, # and with unicode tokens in the comments. Spaces inside the quotes # currently don't do the right thing. diff --git a/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst new file mode 100644 index 00000000000000..2a388178810835 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst @@ -0,0 +1,2 @@ +email: fix misfolding of comma in address-lists over multiple lines in +combination with unicode encoding.