From ebde4e897750963b82bcae785949ac11b3877640 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Mon, 19 May 2025 15:03:07 -0400 Subject: [PATCH] Fix additional refolding-encoding edge case. In this case, the higher level syntactic unit fit on the remainder of the line in un-encoded format, so the encoding check never happened. To fix this, we look inside the current unit to see if has anything that was originally encoded. --- Lib/email/_header_value_parser.py | 13 ++++++++++++- Lib/test/test_email/test__header_value_parser.py | 5 +++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9a51b9437333db..67b885115a3717 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -162,6 +162,14 @@ def comments(self): comments.extend(token.comments) return comments + def has_token_type(self, *token_types): + if self.token_type in token_types: + return True + for t in self: + if t.has_token_type(*token_types): + return True + return False + def fold(self, *, policy): return _refold_parse_tree(self, policy=policy) @@ -922,6 +930,9 @@ def pop_trailing_ws(self): def comments(self): return [] + def has_token_type(self, *token_types): + return self.token_type in token_types + def __getnewargs__(self): return(str(self), self.token_type) @@ -2813,7 +2824,7 @@ def _refold_parse_tree(parse_tree, *, policy): continue tstr = str(part) if not want_encoding: - if part.token_type in ('ptext', 'vtext'): + if part.token_type == 'ptext' or part.has_token_type('vtext'): # Encode if tstr contains special characters. want_encoding = not SPECIALSNL.isdisjoint(tstr) else: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index ac12c3b2306f7d..a2e77d2e8176eb 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3096,6 +3096,11 @@ def test_address_list_with_specials_in_encoded_word(self): 'A =?utf-8?q?v=C3=A9ry?= long name\n' ' containing =?utf-8?q?a=2C?= comma\n' ' \n'), + ('=?utf-8?Q?a=2C=20123456789012345678901234567890123456?=' + ' ', + '=?utf-8?q?a=2C?=\n' + ' 123456789012345678901234567890123456\n' + ' \n'), ] for (to, folded) in cases: with self.subTest(to=to):