From 9da6ddfd98454f2c287d46396e3b1a2563ffd7a1 Mon Sep 17 00:00:00 2001 From: Abhilash Raj Date: Sun, 15 Dec 2019 18:47:55 -0800 Subject: [PATCH 1/4] bpo-39040: Fix parsing of email headers with whitespace between encoded-words. In certain malformed content-disposition headers, parameter values are quoted and split as encoded words on two lines with extra whitespaces. This fixes the issue by removing the extra whitespace between the two encoded words. --- Lib/email/_header_value_parser.py | 11 +++++++++++ Lib/test/test_email/test_headerregistry.py | 10 ++++++++++ .../Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst | 2 ++ 3 files changed, 23 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9c55ef7fb453be..a9241116e67293 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1218,12 +1218,23 @@ def get_bare_quoted_string(value): if value[0] in WSP: token, value = get_fws(value) elif value[:2] == '=?': + valid_ew = False try: token, value = get_encoded_word(value) bare_quoted_string.defects.append(errors.InvalidHeaderDefect( "encoded word inside quoted string")) + valid_ew = True except errors.HeaderParseError: token, value = get_qcontent(value) + + # Collapse the whitespace between two encoded words that occur in a + # bare-quoted-string. + if valid_ew and len(bare_quoted_string) > 1: + if (bare_quoted_string[-1].token_type == 'fws' and + bare_quoted_string[-2].token_type == 'encoded-word'): + + bare_quoted_string[-1] = EWWhiteSpaceTerminal( + bare_quoted_string[-1], 'fws') else: token, value = get_qcontent(value) bare_quoted_string.append(token) diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 38f7ddbf06d5cd..d2e8ff67a365da 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -873,6 +873,16 @@ def content_disp_as_value(self, {'filename': 'foo'}, [errors.InvalidHeaderDefect]), + 'invalid_value_with_fws_bw_ew': ( + 'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?=' + ' =?UTF-8?Q?pdf?="', + 'attachment', + {'filename': 'Schulbesuchsbestättigung.pdf'}, + [errors.InvalidHeaderDefect]*3, + ('attachment; filename="Schulbesuchsbestättigung.pdf"'), + ('Content-Disposition: attachment;\n' + ' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'), + ) } diff --git a/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst b/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst new file mode 100644 index 00000000000000..1e3a69afb5eeca --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst @@ -0,0 +1,2 @@ +Fix parsing of invalid Content-Disposition email headers by collapsing +whitespace between encoded words in a bare-quote-string. From bf2cb76009d72869d9df6550b473b5818ceab311 Mon Sep 17 00:00:00 2001 From: Abhilash Raj Date: Mon, 16 Dec 2019 19:28:43 -0800 Subject: [PATCH 2/4] Remove empty lines, add a new test and update the news entry. --- Lib/email/_header_value_parser.py | 2 -- Lib/test/test_email/test_headerregistry.py | 12 +++++++++++- .../Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index a9241116e67293..51d355fbb0abc5 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1226,13 +1226,11 @@ def get_bare_quoted_string(value): valid_ew = True except errors.HeaderParseError: token, value = get_qcontent(value) - # Collapse the whitespace between two encoded words that occur in a # bare-quoted-string. if valid_ew and len(bare_quoted_string) > 1: if (bare_quoted_string[-1].token_type == 'fws' and bare_quoted_string[-2].token_type == 'encoded-word'): - bare_quoted_string[-1] = EWWhiteSpaceTerminal( bare_quoted_string[-1], 'fws') else: diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index d2e8ff67a365da..0d4c1b8b2fa5ca 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -873,7 +873,7 @@ def content_disp_as_value(self, {'filename': 'foo'}, [errors.InvalidHeaderDefect]), - 'invalid_value_with_fws_bw_ew': ( + 'invalid_parameter_value_with_fws_between_ew': ( 'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?=' ' =?UTF-8?Q?pdf?="', 'attachment', @@ -882,6 +882,16 @@ def content_disp_as_value(self, ('attachment; filename="Schulbesuchsbestättigung.pdf"'), ('Content-Disposition: attachment;\n' ' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'), + ), + + 'parameter_value_with_fws_between_tokens': ( + 'attachment; filename="File Name With Spaces.pdf"', + 'attachment', + {'filename': 'File Name With Spaces.pdf'}, + [], + 'attachment; filename="File Name With Spaces.pdf"', + ('Content-Disposition: attachment; ' + 'filename="File Name With Spaces.pdf"\n'), ) } diff --git a/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst b/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst index 1e3a69afb5eeca..078bce22be30f0 100644 --- a/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst +++ b/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst @@ -1,2 +1,2 @@ -Fix parsing of invalid Content-Disposition email headers by collapsing -whitespace between encoded words in a bare-quote-string. +Fix parsing of invalid mime headers parameters by collapsing whitespace between +encoded words in a bare-quote-string. From 004f37d7caf7cc0da0bb02062be391045cffba42 Mon Sep 17 00:00:00 2001 From: Abhilash Raj Date: Tue, 17 Dec 2019 17:33:18 -0800 Subject: [PATCH 3/4] Update test to include spaces around an encoded word. --- Lib/test/test_email/test_headerregistry.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 0d4c1b8b2fa5ca..3fc324703f263e 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -885,13 +885,13 @@ def content_disp_as_value(self, ), 'parameter_value_with_fws_between_tokens': ( - 'attachment; filename="File Name With Spaces.pdf"', + 'attachment; filename="=?utf-8?q?FileName= WithSpaces.pdf"', 'attachment', - {'filename': 'File Name With Spaces.pdf'}, + {'filename': '=?utf-8?q?FileName= WithSpaces.pdf'}, [], - 'attachment; filename="File Name With Spaces.pdf"', - ('Content-Disposition: attachment; ' - 'filename="File Name With Spaces.pdf"\n'), + 'attachment; filename="=?utf-8?q?FileName= WithSpaces.pdf"', + ('Content-Disposition: attachment;\n' + ' filename="=?utf-8?q?FileName= WithSpaces.pdf"\n'), ) } From 016ceb3ef00b3b940993d35d539ce63d68437d4f Mon Sep 17 00:00:00 2001 From: Abhilash Raj Date: Tue, 17 Dec 2019 17:37:41 -0800 Subject: [PATCH 4/4] Fix the encoded word syntax and resultant error. --- Lib/test/test_email/test_headerregistry.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 3fc324703f263e..b569428908bfe6 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -885,13 +885,12 @@ def content_disp_as_value(self, ), 'parameter_value_with_fws_between_tokens': ( - 'attachment; filename="=?utf-8?q?FileName= WithSpaces.pdf"', + 'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"', 'attachment', - {'filename': '=?utf-8?q?FileName= WithSpaces.pdf'}, - [], - 'attachment; filename="=?utf-8?q?FileName= WithSpaces.pdf"', - ('Content-Disposition: attachment;\n' - ' filename="=?utf-8?q?FileName= WithSpaces.pdf"\n'), + {'filename': 'File Name With Spaces.pdf'}, + [errors.InvalidHeaderDefect], + 'attachment; filename="File Name With Spaces.pdf"', + ('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'), ) }