diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 039f03cba74fa0..3231e16f6de40e 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -100,7 +100,8 @@ def parse(self, fp, headersonly=False): parsing after reading the headers or not. The default is False, meaning it parses the entire contents of the file. """ - fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') + encoding = "utf-8" if getattr(self.parser.policy, "utf8", False) else "ascii" + fp = TextIOWrapper(fp, encoding=encoding, errors='surrogateescape') try: return self.parser.parse(fp, headersonly) finally: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 7b14305f997e5d..f546b24c7cf8db 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3990,6 +3990,44 @@ def test_bytes_parser_on_exception_does_not_close_file(self): fp) self.assertFalse(fp.closed) + def test_bytes_parser_uses_policy_utf8_setting(self): + m = """ + From: Nathaniel Nameson + To: Ned Sampleson + Subject: Sample message + MIME-Version: 1.0 + Content-type: multipart/mixed; boundary="i-am-boundary" + + This is the préamble. It is to be ignored, though it + is a handy place for mail composers to include an + explanatory note to non-MIME compliant readers. + + --i-am-boundary + Content-type: text/plain; charset=us-ascii + + This is explicitly typed plain ASCII text. + It DOES end with a linebreak. + + --i-am-boundary + Content-type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + + This should be correctly encapsulated: Un petit café ? + + --i-am-boundary-- + This is the epilogue. It is also to be ignored. + + """.lstrip() + M_BYTES = BytesIO(m.encode()) + + msg = email.message_from_binary_file(M_BYTES, policy=email.policy.default.clone(utf8=True)) + for i, part in enumerate(msg.iter_parts(), 1): + _ = part.as_string() + + msg_string = msg.as_string() + self.assertIn("This is the préamble.", msg_string) + self.assertIn("Un petit café", msg_string) + def test_parser_does_not_close_file(self): with openfile('msg_02.txt', encoding="utf-8") as fp: email.parser.Parser().parse(fp) diff --git a/Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst b/Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst new file mode 100644 index 00000000000000..3ce8eda70c7687 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst @@ -0,0 +1,2 @@ +Fix incorrect decoding of preamble in BytesParser +Contributed by Gustaf Gyllensporre.