-
-
Notifications
You must be signed in to change notification settings - Fork 32.6k
bpo-35805: Add parser for Message-ID email header. #13397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -179,37 +179,30 @@ def comments(self): | |
|
||
|
||
class UnstructuredTokenList(TokenList): | ||
|
||
token_type = 'unstructured' | ||
|
||
|
||
class Phrase(TokenList): | ||
|
||
token_type = 'phrase' | ||
|
||
class Word(TokenList): | ||
|
||
token_type = 'word' | ||
|
||
|
||
class CFWSList(WhiteSpaceTokenList): | ||
|
||
token_type = 'cfws' | ||
|
||
|
||
class Atom(TokenList): | ||
|
||
token_type = 'atom' | ||
|
||
|
||
class Token(TokenList): | ||
|
||
token_type = 'token' | ||
encode_as_ew = False | ||
|
||
|
||
class EncodedWord(TokenList): | ||
|
||
token_type = 'encoded-word' | ||
cte = None | ||
charset = None | ||
|
@@ -496,16 +489,19 @@ def domain(self): | |
|
||
|
||
class DotAtom(TokenList): | ||
|
||
token_type = 'dot-atom' | ||
|
||
|
||
class DotAtomText(TokenList): | ||
|
||
token_type = 'dot-atom-text' | ||
as_ew_allowed = True | ||
|
||
|
||
class NoFoldLiteral(TokenList): | ||
token_type = 'no-fold-literal' | ||
as_ew_allowed = False | ||
|
||
|
||
class AddrSpec(TokenList): | ||
|
||
token_type = 'addr-spec' | ||
|
@@ -809,35 +805,42 @@ def params(self): | |
|
||
|
||
class ContentType(ParameterizedHeaderValue): | ||
|
||
token_type = 'content-type' | ||
as_ew_allowed = False | ||
maintype = 'text' | ||
subtype = 'plain' | ||
|
||
|
||
class ContentDisposition(ParameterizedHeaderValue): | ||
|
||
token_type = 'content-disposition' | ||
as_ew_allowed = False | ||
content_disposition = None | ||
|
||
|
||
class ContentTransferEncoding(TokenList): | ||
|
||
token_type = 'content-transfer-encoding' | ||
as_ew_allowed = False | ||
cte = '7bit' | ||
|
||
|
||
class HeaderLabel(TokenList): | ||
|
||
token_type = 'header-label' | ||
as_ew_allowed = False | ||
|
||
|
||
class Header(TokenList): | ||
class MsgID(TokenList): | ||
token_type = 'msg-id' | ||
as_ew_allowed = False | ||
|
||
def fold(self, policy): | ||
# message-id tokens may not be folded. | ||
return str(self) + policy.linesep | ||
|
||
class MessageID(MsgID): | ||
token_type = 'message-id' | ||
|
||
|
||
class Header(TokenList): | ||
token_type = 'header' | ||
|
||
|
||
|
@@ -1583,7 +1586,7 @@ def get_addr_spec(value): | |
addr_spec.append(token) | ||
if not value or value[0] != '@': | ||
addr_spec.defects.append(errors.InvalidHeaderDefect( | ||
"add-spec local part with no domain")) | ||
"addr-spec local part with no domain")) | ||
return addr_spec, value | ||
addr_spec.append(ValueTerminal('@', 'address-at-symbol')) | ||
token, value = get_domain(value[1:]) | ||
|
@@ -1968,6 +1971,110 @@ def get_address_list(value): | |
value = value[1:] | ||
return address_list, value | ||
|
||
|
||
def get_no_fold_literal(value): | ||
""" no-fold-literal = "[" *dtext "]" | ||
""" | ||
no_fold_literal = NoFoldLiteral() | ||
if not value: | ||
raise errors.HeaderParseError( | ||
"expected no-fold-literal but found '{}'".format(value)) | ||
if value[0] != '[': | ||
raise errors.HeaderParseError( | ||
"expected '[' at the start of no-fold-literal " | ||
"but found '{}'".format(value)) | ||
no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start')) | ||
value = value[1:] | ||
token, value = get_dtext(value) | ||
no_fold_literal.append(token) | ||
if not value or value[0] != ']': | ||
raise errors.HeaderParseError( | ||
"expected ']' at the end of no-fold-literal " | ||
"but found '{}'".format(value)) | ||
no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end')) | ||
return no_fold_literal, value[1:] | ||
|
||
def get_msg_id(value): | ||
"""msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS] | ||
id-left = dot-atom-text / obs-id-left | ||
id-right = dot-atom-text / no-fold-literal / obs-id-right | ||
no-fold-literal = "[" *dtext "]" | ||
""" | ||
msg_id = MsgID() | ||
if value[0] in CFWS_LEADER: | ||
token, value = get_cfws(value) | ||
msg_id.append(token) | ||
if not value or value[0] != '<': | ||
raise errors.HeaderParseError( | ||
"expected msg-id but found '{}'".format(value)) | ||
msg_id.append(ValueTerminal('<', 'msg-id-start')) | ||
value = value[1:] | ||
# Parse id-left. | ||
try: | ||
token, value = get_dot_atom_text(value) | ||
except errors.HeaderParseError: | ||
try: | ||
# obs-id-left is same as local-part of add-spec. | ||
token, value = get_obs_local_part(value) | ||
msg_id.defects.append(errors.ObsoleteHeaderDefect( | ||
"obsolete id-left in msg-id")) | ||
except errors.HeaderParseError: | ||
raise errors.HeaderParseError( | ||
"expected dot-atom-text or obs-id-left" | ||
" but found '{}'".format(value)) | ||
msg_id.append(token) | ||
if not value or value[0] != '@': | ||
msg_id.defects.append(errors.InvalidHeaderDefect( | ||
"msg-id with no id-right")) | ||
# Even though there is no id-right, if the local part | ||
# ends with `>` let's just parse it too and return | ||
# along with the defect. | ||
if value and value[0] == '>': | ||
msg_id.append(ValueTerminal('>', 'msg-id-end')) | ||
value = value[1:] | ||
return msg_id, value | ||
msg_id.append(ValueTerminal('@', 'address-at-symbol')) | ||
value = value[1:] | ||
# Parse id-right. | ||
try: | ||
token, value = get_dot_atom_text(value) | ||
except errors.HeaderParseError: | ||
try: | ||
token, value = get_no_fold_literal(value) | ||
except errors.HeaderParseError as e: | ||
try: | ||
token, value = get_domain(value) | ||
msg_id.defects.append(errors.ObsoleteHeaderDefect( | ||
"obsolete id-right in msg-id")) | ||
except errors.HeaderParseError: | ||
raise errors.HeaderParseError( | ||
"expected dot-atom-text, no-fold-literal or obs-id-right" | ||
" but found '{}'".format(value)) | ||
msg_id.append(token) | ||
if value and value[0] == '>': | ||
value = value[1:] | ||
else: | ||
msg_id.defects.append(errors.InvalidHeaderDefect( | ||
"missing trailing '>' on msg-id")) | ||
msg_id.append(ValueTerminal('>', 'msg-id-end')) | ||
if value and value[0] in CFWS_LEADER: | ||
token, value = get_cfws(value) | ||
msg_id.append(token) | ||
return msg_id, value | ||
|
||
|
||
def parse_message_id(value): | ||
"""message-id = "Message-ID:" msg-id CRLF | ||
""" | ||
message_id = MessageID() | ||
try: | ||
token, value = get_msg_id(value) | ||
except errors.HeaderParseError: | ||
message_id.defects.append(errors.InvalidHeaderDefect( | ||
"Expected msg-id but found {!r}".format(value))) | ||
message_id.append(token) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This appears to bomb out when building hyper kitty, because token is referenced before assignment, when
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, same for me. Also, accessing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @surkova Thank you! I think this should be fixed up properly. Opened BPO https://bugs.python.org/issue38708 |
||
return message_id | ||
|
||
# | ||
# XXX: As I begin to add additional header parsers, I'm realizing we probably | ||
# have two level of parser routines: the get_XXX methods that get a token in | ||
|
Uh oh!
There was an error while loading. Please reload this page.