python · Xiami2012 · Aug 29, 2018 · Aug 29, 2018 · georgschoelly · Feb 5, 2019
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
@@ -2726,12 +2726,23 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
             continue
         first_part = to_encode[:text_space]
         ew = _ew.encode(first_part, charset=encode_as)
-        excess = len(ew) - remaining_space
-        if excess > 0:
-            # encode always chooses the shortest encoding, so this
-            # is guaranteed to fit at this point.
-            first_part = first_part[:-excess]
-            ew = _ew.encode(first_part)
+        if len(ew) > remaining_space:
+            # Find the longest first_part
+            # since len(_ew.encode(to_encode[:x])) is a non-linear
+            # monotonically increasing function, and calculating the
+            # exactly length requires knowing the internal of _ew.encode
+            # which seems dirty, use binary search here.
+            part_len_l = 0
+            part_len_r = text_space
+            while part_len_l + 1 < part_len_r:
+                part_len_m = (part_len_l + part_len_r) // 2
+                ew = _ew.encode(first_part[:part_len_m], charset=encode_as)
+                if len(ew) <= remaining_space:
+                    part_len_l = part_len_m
+                else:
+                    part_len_r = part_len_m
+            first_part = to_encode[:part_len_l]
+            ew = _ew.encode(first_part, charset=encode_as)
         lines[-1] += ew
         to_encode = to_encode[len(first_part):]
         if to_encode:

diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
@@ -2687,6 +2687,12 @@ def test_unstructured_with_unicode_no_folds(self):
         self._test(parser.get_unstructured("hübsch kleiner beißt"),
                    "=?utf-8?q?h=C3=BCbsch_kleiner_bei=C3=9Ft?=\n")
 
+    def test_unstructured_with_long_unicode_folded(self):
+        self._test(parser.get_unstructured("虾" * 40),
+                   "=?utf-8?b?" + "6Jm+" * 16 + "?=\n"
+                   " =?utf-8?b?" + "6Jm+" * 16 + "?=\n"
+                   " =?utf-8?b?" + "6Jm+" * 8 + "?=\n")
+
     def test_one_ew_on_each_of_two_wrapped_lines(self):
         self._test(parser.get_unstructured("Mein kleiner Kaktus ist sehr "
                                            "hübsch.  Es hat viele Stacheln "

diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
@@ -1643,10 +1643,10 @@ def test_fold_overlong_words_using_RFC2047(self):
         self.assertEqual(
             h.fold(policy=policy.default),
             'X-Report-Abuse: =?utf-8?q?=3Chttps=3A//www=2Emailitapp=2E'
-                'com/report=5F?=\n'
-            ' =?utf-8?q?abuse=2Ephp=3Fmid=3Dxxx-xxx-xxxx'
-                'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-?=\n'
-            ' =?utf-8?q?xx-xx=3E?=\n')
+                'com/report=5Fabuse?=\n'
+            ' =?utf-8?q?=2Ephp=3Fmid=3Dxxx-xxx-xxxx'
+                'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-xx-xx?=\n'
+            ' =?utf-8?q?=3E?=\n')
 
 
 if __name__ == '__main__':

diff --git a/Misc/ACKS b/Misc/ACKS
@@ -1599,6 +1599,7 @@ Anish Tambe
 Musashi Tamura
 William Tanksley
 Christian Tanzer
+Pengyu Tao
 Steven Taschuk
 Amy Taylor
 Julian Taylor

diff --git a/Misc/NEWS.d/next/Library/2018-08-30-11-11-25.bpo-34222.yA1Rn7.rst b/Misc/NEWS.d/next/Library/2018-08-30-11-11-25.bpo-34222.yA1Rn7.rst
@@ -0,0 +1 @@
+Fix infinite loop when folding non-ASCII email headers
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix infinite loop when folding non-ASCII email headers