From 3bc8212ad08e59ed5350fa25f1a0f5b648b0c49c Mon Sep 17 00:00:00 2001
From: Gustaf <79180496+GGyll@users.noreply.github.com>
Date: Tue, 20 May 2025 20:54:38 +0200
Subject: [PATCH 1/5] email: Correctly decode using policy UTF-8 attribute in
 BytesParser

---
 Lib/email/parser.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 039f03cba74fa0..3231e16f6de40e 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -100,7 +100,8 @@ def parse(self, fp, headersonly=False):
         parsing after reading the headers or not.  The default is False,
         meaning it parses the entire contents of the file.
         """
-        fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+        encoding = "utf-8" if getattr(self.parser.policy, "utf8", False) else "ascii"
+        fp = TextIOWrapper(fp, encoding=encoding, errors='surrogateescape')
         try:
             return self.parser.parse(fp, headersonly)
         finally:

From 29f3802d202aa3a2d2b7c8243de554413657a20e Mon Sep 17 00:00:00 2001
From: Gustaf <79180496+GGyll@users.noreply.github.com>
Date: Tue, 20 May 2025 20:57:08 +0200
Subject: [PATCH 2/5] Added test for message_from_binary_file using utf-8

---
 Lib/test/test_email/test_email.py | 32 +++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 7b14305f997e5d..17ea125c3c55db 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3989,6 +3989,38 @@ def test_bytes_parser_on_exception_does_not_close_file(self):
                               bytesParser(policy=email.policy.strict).parse,
                               fp)
             self.assertFalse(fp.closed)
+            
+    def test_bytes_parser_uses_policy_utf8_setting(self):
+        m = """
+            From: Nathaniel Nameson <nathan@nameson.com>
+            To: Ned Sampleson <ned@sampleson.com>
+            Subject: Sample message
+            MIME-Version: 1.0
+            Content-type: multipart/mixed; boundary="i-am-boundary"
+
+            This is the préamble.  It is to be ignored, though it
+            is a handy place for mail composers to include an
+            explanatory note to non-MIME compliant readers.
+
+            --i-am-boundary
+            Content-type: text/plain; charset=us-ascii
+
+            This is explicitly typed plain ASCII text.
+            It DOES end with a linebreak.
+
+            --i-am-boundary
+            Content-type: text/plain; charset=utf-8
+            Content-Transfer-Encoding: 8bit
+
+            This should be correctly encapsulated: Un petit café ?
+
+            --i-am-boundary--
+            This is the epilogue.  It is also to be ignored.
+
+            """.lstrip()
+        M_BYTES = BytesIO(m.encode())
+        msg = email.message_from_binary_file(M_BYTES, policy=email.policy.default.clone(utf8=True))
+        self.assertEqual(msg.as_string(), m)
 
     def test_parser_does_not_close_file(self):
         with openfile('msg_02.txt', encoding="utf-8") as fp:

From 64d2bd39eaa7dea5e2187ba07c568c7508ed532a Mon Sep 17 00:00:00 2001
From: Gustaf <79180496+GGyll@users.noreply.github.com>
Date: Tue, 20 May 2025 21:32:25 +0200
Subject: [PATCH 3/5] Update test_bytes_parser_uses_policy_utf8_setting test

Now asserts that the UTF-8 encoded data is present in the final message
---
 Lib/test/test_email/test_email.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 17ea125c3c55db..c98a53a3be43b8 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -4019,8 +4019,14 @@ def test_bytes_parser_uses_policy_utf8_setting(self):
 
             """.lstrip()
         M_BYTES = BytesIO(m.encode())
+
         msg = email.message_from_binary_file(M_BYTES, policy=email.policy.default.clone(utf8=True))
-        self.assertEqual(msg.as_string(), m)
+        for i, part in enumerate(msg.iter_parts(), 1):
+            _ = part.as_string()
+
+        msg_string = msg.as_string()
+        self.assertIn("This is the préamble.", msg_string)
+        self.assertIn("Un petit café", msg_string)
 
     def test_parser_does_not_close_file(self):
         with openfile('msg_02.txt', encoding="utf-8") as fp:

From 4bbea35ab353a46af84f6e204e31fe0115ce9b10 Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Tue, 20 May 2025 19:37:10 +0000
Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?=
 =?UTF-8?q?rb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst

diff --git a/Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst b/Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst
new file mode 100644
index 00000000000000..3ce8eda70c7687
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-05-20-19-37-09.gh-issue-118718.dyhtAS.rst
@@ -0,0 +1,2 @@
+Fix incorrect decoding of preamble in BytesParser
+Contributed by Gustaf Gyllensporre.

From bfcef667a9196c88bc5c210a8fee03d7d3b418bd Mon Sep 17 00:00:00 2001
From: Gustaf <79180496+GGyll@users.noreply.github.com>
Date: Wed, 21 May 2025 18:00:08 +0200
Subject: [PATCH 5/5] removed extra whitespace in test_email.py

---
 Lib/test/test_email/test_email.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index c98a53a3be43b8..f546b24c7cf8db 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3989,7 +3989,7 @@ def test_bytes_parser_on_exception_does_not_close_file(self):
                               bytesParser(policy=email.policy.strict).parse,
                               fp)
             self.assertFalse(fp.closed)
-            
+
     def test_bytes_parser_uses_policy_utf8_setting(self):
         m = """
             From: Nathaniel Nameson <nathan@nameson.com>