From 3d056b776cf2e1ff2898831c30aa00f9fad86abd Mon Sep 17 00:00:00 2001 From: Mike Edmunds Date: Sat, 18 Jan 2025 16:50:52 -0800 Subject: [PATCH 1/2] [3.10] gh-80222: Fix email address header folding with long quoted-string (GH-122753) Email generators using email.policy.default could incorrectly omit the quote ('"') characters from a quoted-string during header refolding, leading to invalid address headers and enabling header spoofing. This change restores the quote characters on a bare-quoted-string as the header is refolded, and escapes backslash and quote chars in the string. (cherry picked from commit 5aaf4168583) Co-authored-by: Mike Edmunds --- Lib/email/_header_value_parser.py | 19 ++++++++- .../test_email/test__header_value_parser.py | 40 +++++++++++++++++++ ...4-08-06-11-43-08.gh-issue-80222.wfR4BU.rst | 6 +++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index e1b99d5b417253..5a467669db72e5 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -95,8 +95,16 @@ NLSET = {'\n', '\r'} SPECIALSNL = SPECIALS | NLSET + +def make_quoted_pairs(value): + """Escape dquote and backslash for use within a quoted-string.""" + return str(value).replace('\\', '\\\\').replace('"', '\\"') + + def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' + escaped = make_quoted_pairs(value) + return f'"{escaped}"' + # Match a RFC 2047 word, looks like =?utf-8?q?someword?= rfc2047_matcher = re.compile(r''' @@ -2848,6 +2856,15 @@ def _refold_parse_tree(parse_tree, *, policy): if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. newparts = list(part) + if part.token_type == 'bare-quoted-string': + # To fold a quoted string we need to create a list of terminal + # tokens that will render the leading and trailing quotes + # and use quoted pairs in the value as appropriate. + newparts = ( + [ValueTerminal('"', 'ptext')] + + [ValueTerminal(make_quoted_pairs(p), 'ptext') + for p in newparts] + + [ValueTerminal('"', 'ptext')]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 854f2ff009c618..0a9b1f4705b028 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2946,6 +2946,46 @@ def test_address_list_with_unicode_names_in_quotes(self): '=?utf-8?q?H=C3=BCbsch?= Kaktus ,\n' ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') + def test_address_list_with_list_separator_after_fold(self): + a = 'x' * 66 + '@example.com' + to = f'{a}, "Hübsch Kaktus" ' + self._test(parser.get_address_list(to)[0], + f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus \n') + + a = '.' * 79 # ('.' is a special, so must be in quoted-string.) + to = f'"{a}" , "Hübsch Kaktus" ' + self._test(parser.get_address_list(to)[0], + f'"{a}"\n' + ' , =?utf-8?q?H=C3=BCbsch?= Kaktus ' + '\n') + + def test_address_list_with_specials_in_long_quoted_string(self): + # Regression for gh-80222. + policy = self.policy.clone(max_line_length=40) + cases = [ + # (to, folded) + ('"Exfiltrator (unclosed comment?" ', + '"Exfiltrator (unclosed\n' + ' comment?" \n'), + ('"Escaped \\" chars \\\\ in quoted-string stay escaped" ', + '"Escaped \\" chars \\\\ in quoted-string\n' + ' stay escaped" \n'), + ('This long display name does not need quotes ', + 'This long display name does not need\n' + ' quotes \n'), + ('"Quotes are not required but are retained here" ', + '"Quotes are not required but are\n' + ' retained here" \n'), + ('"A quoted-string, it can be a valid local-part"@example.com', + '"A quoted-string, it can be a valid\n' + ' local-part"@example.com\n'), + ('"local-part-with-specials@but-no-fws.cannot-fold"@example.com', + '"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'), + ] + for (to, folded) in cases: + with self.subTest(to=to): + self._test(parser.get_address_list(to)[0], folded, policy=policy) + # XXX Need tests with comments on various sides of a unicode token, # and with unicode tokens in the comments. Spaces inside the quotes # currently don't do the right thing. diff --git a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst new file mode 100644 index 00000000000000..0f0661d0b1cf4a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst @@ -0,0 +1,6 @@ +Fix bug in the folding of quoted strings when flattening an email message using +a modern email policy. Previously when a quoted string was folded so that +it spanned more than one line, the surrounding quotes and internal escapes +would be omitted. This could theoretically be used to spoof header lines +using a carefully constructed quoted string if the resulting rendered email +was transmitted or re-parsed. From f35f5c09d48db38f65f42b6bddf9a4888d746ba2 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Fri, 14 Mar 2025 12:18:41 -0400 Subject: [PATCH 2/2] Remove test incorrectly included by cherry-pick. --- Lib/test/test_email/test__header_value_parser.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 0a9b1f4705b028..4007be9a55c2e6 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2946,19 +2946,6 @@ def test_address_list_with_unicode_names_in_quotes(self): '=?utf-8?q?H=C3=BCbsch?= Kaktus ,\n' ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') - def test_address_list_with_list_separator_after_fold(self): - a = 'x' * 66 + '@example.com' - to = f'{a}, "Hübsch Kaktus" ' - self._test(parser.get_address_list(to)[0], - f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus \n') - - a = '.' * 79 # ('.' is a special, so must be in quoted-string.) - to = f'"{a}" , "Hübsch Kaktus" ' - self._test(parser.get_address_list(to)[0], - f'"{a}"\n' - ' , =?utf-8?q?H=C3=BCbsch?= Kaktus ' - '\n') - def test_address_list_with_specials_in_long_quoted_string(self): # Regression for gh-80222. policy = self.policy.clone(max_line_length=40)