python · f18a14c09s · Jul 17, 2021 · Jul 17, 2021 · Jul 17, 2021 · Jul 18, 2021
@@ -35,6 +35,8 @@
 
 
 from base64 import b64encode
+from typing import ByteString, Callable
+
 from binascii import b2a_base64, a2b_base64
 
 CRLF = '\r\n'
@@ -114,6 +116,65 @@ def decode(string):
         return a2b_base64(string)
 
 
+class Base64FeedDecoder:
+    """
+    Adaptation of RFC 2045, s. 6.8 that performs incremental decoding for
+     FeedParser API.
+
+    Note that there is no parsing-related functionality in this class.
+     Therefore, this class could be generalized, by making the _feed variable
+     optional, a new _decode_buffer variable that is returned by close(),
+     and _decode a constructor kwarg, for example; and refactored/moved to the
+     top-level, base64 package.
+    """
+
+    def __init__(self, feed: Callable[[ByteString], None]):
+        """
+        :param feed: function that, when specified, consumes the decoded data.
+        """
+        self._decode = a2b_base64  # Underlying decoder implementation.
+        self._feed = feed  # Consumes the decoded data.
+        # This buffers an incomplete base-64 block that can't be decoded or
+        # parsed yet:
+        self._encoded_buffer = bytearray()
+
+    def feed(self, data: ByteString):
+        """
+        Feed the parser some more base-64-encoded data. data should be a
+         bytes-like object representing one or more decoded octets. The octets
+         can be partial and the decoder will stitch such partial octets together
+         properly.
+        :param data: bytes-like object of arbitrary-length.
+        """
+        # Remove whitespace to ensure accurate length calculation:
+        data = bytes(encoded_byte
+                     for encoded_byte in data
+                     if encoded_byte not in b'\r\n')
+        # Update buffer and decode any complete base-64 blocks:
+        self._encoded_buffer.extend(data)
+        decodable_length = int(len(self._encoded_buffer) / 4) * 4
+        if decodable_length >= 1:
+            decodable_bytes = self._encoded_buffer[:decodable_length]
+            self._encoded_buffer = self._encoded_buffer[decodable_length:]
+            decoded_bytes = self._decode(decodable_bytes)
+            # If _feed were made optional, then the decoded bytes could be
+            # appended to a new self._decoded_buffer variable when _feed is
+            # None:
+            self._feed(decoded_bytes)
+
+    def close(self):
+        """
+        Ensure the decoding of all previously fed data; and validate the input
+         length.  It is undefined what happens if feed() is called after this
+         method has been called.
+        :raises: ValueError if the input fails length validation.
+        """
+        if len(self._encoded_buffer) >= 1:
+            raise ValueError('The base-64 input has invalid length.')
+        # If _feed were made optional, then a new self._decoded_buffer variable
+        # could be returned when _feed is None.
+
+
 # For convenience and backwards compatibility w/ standard base64 module
 body_decode = decode
 decodestring = decode
@@ -21,12 +21,16 @@
 
 __all__ = ['FeedParser', 'BytesFeedParser']
 
+import abc
+import base64
+import quopri
 import re
 
 from email import errors
 from email._policybase import compat32
 from collections import deque
 from io import StringIO
+from email.base64mime import Base64FeedDecoder
 
 NLCRE = re.compile(r'\r\n|\r|\n')
 NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
@@ -292,6 +296,34 @@ def _parsegen(self):
                 # Not at EOF so this is a line we're going to need.
                 self._input.unreadline(line)
             return
+        if self._cur.get_content_type() == 'message/global':
+            # Support for message/global parts that can have non-identity
+            # content-transfer-encodings as outlined in RFC 6532
+            # (s. 1, p. 3; s 3.5; "Encoding considerations," s. 3.7).
+            decoding_parser_factory = _decoding_parser_factory_map.get(
+                self._cur['Content-Transfer-Encoding']
+            )
+            if decoding_parser_factory is not None:
+                # This block only executes if the subpart needs to be decoded as
+                # it's parsed.  Unspecified and identity
+                # Content-Transfer-Encodings are implicitly handled in a
+                # subsequent block.
+                decoding_parser = decoding_parser_factory(
+                    policy=self.policy,
+                    _factory=self._factory
+                )
+                # Decode current part's body and parse as another part:
+                for line in self._input:
+                    if line is NeedMoreData:
+                        yield NeedMoreData
+                        continue
+                    if line == '':
+                        break
+                    decoding_parser.feed(line)
+                # Retrieve new part and attach (i.e. make a subpart):
+                subpart = decoding_parser.close()
+                self._cur.attach(subpart)
+                return
         if self._cur.get_content_maintype() == 'message':
             # The message claims to be a message/* type, then what follows is
             # another RFC 2822 message.
@@ -534,3 +566,108 @@ class BytesFeedParser(FeedParser):
 
     def feed(self, data):
         super().feed(data.decode('ascii', 'surrogateescape'))
+
+
+class EncodedFeedParser(abc.ABC):
+    """
+    This is an abstract base class; only its subclasses should be instantiated
+     directly.
+
+    Instances of this class work like FeedParser except that the concrete
+     implementations of feed(), prior to parsing the input, transparently decode
+     the input consistent with RFC 2045, s. 6.2.  Each subclass reverses one of
+     the non-identity Content-Transfer-Encoding transformations described there.
+    """
+
+    def __init__(self, *args, **kwargs):
+        self._bytes_feed_parser = BytesFeedParser(*args, **kwargs)
+
+    @abc.abstractmethod
+    def feed(self, text):
+        pass
+
+    def close(self):
+        return self._bytes_feed_parser.close()
+
+
+class Base64EncodedFeedParser(EncodedFeedParser):
+    """
+    FeedParser that supports base64-encoded message parts (i.e. the combination
+     of RFC 2045, s. 6.8; and RFC 6532, particularly s. 3.5).
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # This buffer, when non-empty between calls to feed(), represents an
+        # incomplete base64 block that can't be decoded or parsed yet:
+        self._decoder = Base64FeedDecoder(self._bytes_feed_parser.feed)
+        self._errors = []
+
+    def feed(self, text):
+        encoded_bytes = text.encode('ascii')
+        try:
+            self._decoder.feed(encoded_bytes)
+        except Exception as e:
+            self._errors.append(e)
+
+    def close(self):
+        message_part = self._bytes_feed_parser.close()
+        # Attempt to close the decoder in case any further errors occur:
+        try:
+            self._decoder.close()
+        except Exception as e:
+            self._errors.append(e)
+        # Include the decoding-related errors in the message:
+        for error in self._errors:
+            self.policy.handle_defect(message_part, error)
+        return message_part
+
+
+class QuotedPrintableFeedParser(EncodedFeedParser):
+    """
+    FeedParser that supports quoted-printable message parts (i.e. the
+     combination of RFC 2045, s. 6.7; and RFC 6532, particularly s. 3.5).
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._encoded_buffer = bytearray()
+
+    def feed(self, text):
+        self._encoded_buffer.extend(text.encode('ascii'))
+        if len(text) < 1 or len(self._encoded_buffer) < 1:
+            # Buffer either hasn't changed since last call or still has nothing
+            # that can be parsed.
+            return
+        index_of_last_equal_sign = self._encoded_buffer.rfind(b'=')
+        if (index_of_last_equal_sign < 0
+                or index_of_last_equal_sign < len(self._encoded_buffer) - 2):
+            # The buffer either contains no 3-char-sequence, octets/soft line
+            # breaks; or it contains all three chars of its last octet/soft line
+            # break; so the whole buffer can be decoded and parsed.
+            last_decodable_index = len(self._encoded_buffer) - 1
+        else:
+            # The buffer doesn't yet have all three chars of its last octet/soft
+            # line break, so only the chars leading up to its last equal sign
+            # can be decoded.
+            last_decodable_index = index_of_last_equal_sign - 1
+        encoded_bytes = self._encoded_buffer[:last_decodable_index + 1]
+        self._encoded_buffer = self._encoded_buffer[last_decodable_index + 1:]
+        if len(encoded_bytes) >= 1:
+            decoded_bytes = quopri.decodestring(encoded_bytes)
+            self._bytes_feed_parser.feed(decoded_bytes)
+
+    def close(self):
+        if len(self._encoded_buffer) >= 1:
+            # TODO: Add a defect to the message object.
+            pass
+        return self._bytes_feed_parser.close()
+
+
+# Map of EncodedFeedParser "factory" functions keyed by
+# Content-Transfer-Encodings.  Note that the semantics of "decoding" in this
+# context exclude identity transformations (i.e. where no decoding is required):
+_decoding_parser_factory_map = {
+    'quoted-printable': QuotedPrintableFeedParser,
+    'base64': Base64EncodedFeedParser
+}