diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 91243378dc0441..a8ea0128c8d750 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -874,6 +874,12 @@ class MessageID(MsgID): class InvalidMessageID(MessageID): token_type = 'invalid-message-id' +class MessageIDList(TokenList): + token_type = 'message-id-list' + + @property + def message_ids(self): + return [x for x in self if x.token_type=='msg-id'] class Header(TokenList): token_type = 'header' @@ -2171,6 +2177,32 @@ def parse_message_id(value): return message_id +def parse_message_ids(value): + """in-reply-to = "In-Reply-To:" 1*msg-id CRLF + references = "References:" 1*msg-id CRLF + """ + message_ids = MessageIDList() + while value: + # message id list separated with commas - this is invalid, + # but happens rather frequently in the wild + if value and value[0] == ',': + message_ids.defects.append( + errors.InvalidHeaderDefect("msg-id separated with comma")) + value = value[1:] + continue + + try: + token, value = get_msg_id(value) + message_ids.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_ids.append(InvalidMessageID(token)) + message_ids.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + break + + return message_ids + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 543141dc427ebe..4c3f4e929320e9 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -534,6 +534,18 @@ def parse(cls, value, kwds): kwds['defects'].extend(parse_tree.all_defects) +class ReferencesHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_ids) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = ' '.join((str(i) for i in parse_tree.message_ids)) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -557,6 +569,8 @@ def parse(cls, value, kwds): 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, 'message-id': MessageIDHeader, + 'in-reply-to': ReferencesHeader, + 'references': ReferencesHeader, } class HeaderRegistry: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 179e236ecdfd7f..6d649b0dab74b5 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2867,6 +2867,69 @@ def test_get_msg_id_ws_only_local(self): ) self.assertEqual(msg_id.token_type, 'msg-id') + def test_parse_message_ids_valid(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " ", + " ", + " ", + [], + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_empty(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " ", + " ", + " ", + [errors.InvalidHeaderDefect], + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_comment(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " (foo's message from \"bar\")", + " (foo's message from \"bar\")", + " ", + [], + ) + self.assertEqual(message_ids.message_ids[0].value, ' ') + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_comma_sep(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + ",", + "", + "", + [errors.InvalidHeaderDefect], + ) + self.assertEqual(message_ids.message_ids[0].value, '') + self.assertEqual(message_ids.message_ids[1].value, '') + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_invalid_id(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + "", + "", + "", + [errors.InvalidHeaderDefect]*2, + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_broken_ang(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " >bar@foo", + " >bar@foo", + " >bar@foo", + [errors.InvalidHeaderDefect]*1, + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + @parameterize diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index ff7a6da644d572..df34ec70504bc5 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1812,5 +1812,18 @@ def test_message_id_header_is_not_folded(self): h.fold(policy=policy.default.clone(max_line_length=20)), 'Message-ID:\n <ईमेलfromMessage@wők.com>\n') + def test_fold_references(self): + h = self.make_header( + 'References', + ' ' + '' + ) + self.assertEqual( + h.fold(policy=policy.default.clone(max_line_length=20)), + 'References: ' + '\n' + ' \n') + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst b/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst new file mode 100644 index 00000000000000..57e14e9cdcaae3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst @@ -0,0 +1,2 @@ +Add parsing for References/In-Reply-To email headers, preventing them from +being folded.